From e03865b453b63d38e9960fcfc8eb0181bfe27f78 Mon Sep 17 00:00:00 2001 From: mateusz <1+mateusz@noreply.localhost> Date: Thu, 14 May 2026 21:16:57 +0200 Subject: [PATCH] revert 6475f767875c778e5ab38584b56f617d5b594359 revert Add L2 incident triage report wrapper --- CHANGELOG.md | 1 - README.md | 2 +- infra-run/README.md | 2 +- infra-run/examples/README.md | 1 - .../l2-incident-triage-report.sample.md | 131 ------ .../scripts/bash/incident-checks/README.md | 16 - .../bash/incident-checks/check_high_cpu.sh | 2 +- .../incident-checks/incident_triage_report.sh | 385 ------------------ 8 files changed, 3 insertions(+), 537 deletions(-) delete mode 100644 infra-run/examples/incident-triage/l2-incident-triage-report.sample.md delete mode 100755 infra-run/scripts/bash/incident-checks/incident_triage_report.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cb8eec..b60d04a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,6 @@ - `journal-analyzer` for exported `journalctl` log review. - `known-error-matcher` with JSON-based known error patterns. - Standalone Bash incident checks for CPU, memory/OOM, service restart loops, failed SSH logins, certificate expiry, DNS connectivity, NTP drift, read-only filesystems, inode usage, and JVM process diagnostics. -- `incident_triage_report.sh` for L2 Markdown incident handover reports built from existing Bash incident checks. - Repository-level Codex guidance: - `AGENTS.md` - `docs/codex/README.md` diff --git a/README.md b/README.md index 39639bb..eee2af3 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ It is a technical portfolio, not a production toolkit. The examples show how ope - [infra-run](./infra-run/) - the main implemented project in this repository. - [Linux healthcheck scripts](./infra-run/scripts/bash/os-healthcheck/) - host, disk, service, network, and report helpers. -- [Bash incident checks](./infra-run/scripts/bash/incident-checks/) - standalone read-only checks for common Linux incidents, plus an L2 Markdown triage report wrapper for repeatable handoff and ticket evidence. +- [Bash incident checks](./infra-run/scripts/bash/incident-checks/) - standalone read-only checks for common Linux incidents, designed for copy-to-server triage and ticket evidence. - [Disk full workflow](./infra-run/scripts/bash/disk-full/) - triage scripts for usage, inode pressure, deleted open files, large files, log cleanup review, and postchecks. - [Veritas examples](./infra-run/scripts/bash/veritas/) - dry-run-first VxVM/VCS storage expansion workflow examples. - [GPFS examples](./infra-run/scripts/bash/gpfs/) - dry-run-first IBM Spectrum Scale expansion workflow examples. diff --git a/infra-run/README.md b/infra-run/README.md index 2529419..ea10cac 100644 --- a/infra-run/README.md +++ b/infra-run/README.md @@ -9,7 +9,7 @@ The goal is to show operational judgment, not to ship a universal automation pro ### Bash Operational Scripts - [scripts/bash/os-healthcheck](./scripts/bash/os-healthcheck/) - general Linux health, service, disk, network, and report scripts. -- [scripts/bash/incident-checks](./scripts/bash/incident-checks/) - standalone read-only incident checks for CPU, memory/OOM, SSH failures, TLS expiry, DNS, NTP, filesystems, inodes, services, JVM diagnostics, and an L2 Markdown triage report wrapper. +- [scripts/bash/incident-checks](./scripts/bash/incident-checks/) - standalone read-only incident checks for CPU, memory/OOM, SSH failures, TLS expiry, DNS, NTP, filesystems, inodes, services, and JVM diagnostics. - [scripts/bash/disk-full](./scripts/bash/disk-full/) - disk-full triage and cleanup review workflow. - [scripts/bash/veritas](./scripts/bash/veritas/) - Veritas VxVM/VCS storage expansion workflow examples. - [scripts/bash/gpfs](./scripts/bash/gpfs/) - GPFS / IBM Spectrum Scale expansion workflow examples. diff --git a/infra-run/examples/README.md b/infra-run/examples/README.md index 05ea748..756d894 100644 --- a/infra-run/examples/README.md +++ b/infra-run/examples/README.md @@ -7,6 +7,5 @@ These files use fake hostnames, reserved example domains, reserved IP address ra ## Included - `disk-full/` - sample filesystem usage, deleted open files, and a short after-action report. -- `incident-triage/` - sample L2 incident triage report for repeatable handoff and ticket evidence. - `veritas/` - sample VxVM disk and VCS service group output. - `gpfs/` - sample GPFS cluster and NSD output. diff --git a/infra-run/examples/incident-triage/l2-incident-triage-report.sample.md b/infra-run/examples/incident-triage/l2-incident-triage-report.sample.md deleted file mode 100644 index 41eeef1..0000000 --- a/infra-run/examples/incident-triage/l2-incident-triage-report.sample.md +++ /dev/null @@ -1,131 +0,0 @@ -# L2 Incident Triage Report - -- Generated: 2026-05-12T19:30:00Z -- Local hostname: app01.example.internal -- Current user: triage -- Incident type: all -- Service: nginx -- Host: app.example.com -- Port: 443 -- PID: not provided -- Process match: not provided -- Since: 30 minutes ago - -## Executed Checks - -| Check | Script | Status | Exit | Command | -| --- | --- | --- | --- | --- | -| CPU saturation | `check_high_cpu.sh` | OK | 0 | `./check_high_cpu.sh` | -| Memory and OOM | `check_high_memory_oom.sh` | WARNING | 1 | `./check_high_memory_oom.sh --since "30 minutes ago"` | -| Service restart loop | `check_service_restart_loop.sh` | OK | 0 | `./check_service_restart_loop.sh --service nginx --since "30 minutes ago"` | -| DNS and connectivity | `check_dns_connectivity.sh` | OK | 0 | `./check_dns_connectivity.sh --host app.example.com --port 443` | -| Failed SSH logins | `check_failed_ssh_logins.sh` | OK | 0 | `./check_failed_ssh_logins.sh --since "30 minutes ago"` | -| Certificate expiry | `check_certificate_expiry.sh` | OK | 0 | `./check_certificate_expiry.sh --host app.example.com --port 443` | -| Read-only filesystems | `check_filesystem_readonly.sh` | OK | 0 | `./check_filesystem_readonly.sh` | -| Inode usage | `check_inode_usage.sh` | OK | 0 | `./check_inode_usage.sh` | -| JVM threads and heap | `check_jvm_threads_heap.sh` | WARNING | 1 | `./check_jvm_threads_heap.sh` | - -## Summary - -- CPU saturation: OK: 1-minute load is 0.42 across 4 CPU(s) (10% of CPU count) -- Memory and OOM: WARNING: Memory usage is 84% and swap usage is 12% -- Service restart loop: OK: Service nginx state=active substate=running restarts=0 -- DNS and connectivity: OK: DNS=OK ping=OK tcp_443=OK -- Failed SSH logins: OK: Found 2 failed SSH login attempt(s) for requested window -- Certificate expiry: OK: Certificate for app.example.com:443 expires in 74 day(s) -- Read-only filesystems: OK: Found 0 read-only filesystem(s) -- Inode usage: OK: Highest inode usage is 42% -- JVM threads and heap: WARNING: No Java processes detected - -## Raw Evidence - -### CPU saturation - -Script: `check_high_cpu.sh` - -Command: `./check_high_cpu.sh` - -Status: OK, exit: 0 - -```text -OK: 1-minute load is 0.42 across 4 CPU(s) (10% of CPU count) - -Load average: -1m=0.42 5m=0.38 15m=0.31 - -Top CPU processes: -PID PPID USER %CPU %MEM COMMAND ARGS -1450 1 app 7.2 2.1 nginx nginx: worker process - -Recommended next steps: -- Check process ownership and whether the top process is expected -- Review logs for the top CPU-consuming process -``` - -### Memory and OOM - -Script: `check_high_memory_oom.sh` - -Command: `./check_high_memory_oom.sh --since "30 minutes ago"` - -Status: WARNING, exit: 1 - -```text -WARNING: Memory usage is 84% and swap usage is 12% - -Memory summary: -Mem: 15800 13272 1110 210 1418 1840 -Swap: 4095 512 3583 - -OOM events since 30 minutes ago: -OK: no OOM evidence found in available sources -``` - -### Service restart loop - -Script: `check_service_restart_loop.sh` - -Command: `./check_service_restart_loop.sh --service nginx --since "30 minutes ago"` - -Status: OK, exit: 0 - -```text -OK: Service nginx state=active substate=running restarts=0 - -Systemd properties: -Id=nginx.service -ActiveState=active -SubState=running -NRestarts=0 -``` - -### Skipped or limited checks - -```text -JVM threads and heap returned WARNING because no Java process was detected. -No destructive commands were run. No service restarts, process kills, remounts, or configuration changes were attempted. -``` - -## L2 Handover Checklist - -- [ ] Business impact confirmed -- [ ] Affected host/service identified -- [ ] Monitoring alert attached -- [ ] Recent changes checked -- [ ] Logs attached -- [ ] Service owner identified -- [ ] Escalation target identified - -## Escalation Notes - -- Escalate when impact is active, spreading, customer-facing, or outside L2 access. -- Include the alert, timeline, commands run, and the raw evidence above. -- Call out skipped checks and missing inputs so the next responder does not repeat the same gap. -- Do not restart, kill, remount, or rotate anything unless the incident owner approves the action. - -## Recommended Next Steps - -- Confirm the symptom against monitoring and user reports. -- Compare this point-in-time evidence with recent deploys, config changes, and host events. -- Attach this report to the incident ticket before handoff. -- If escalation is needed, include exact hostnames, service names, timestamps, and observed impact. diff --git a/infra-run/scripts/bash/incident-checks/README.md b/infra-run/scripts/bash/incident-checks/README.md index 0f8894b..7851e8f 100644 --- a/infra-run/scripts/bash/incident-checks/README.md +++ b/infra-run/scripts/bash/incident-checks/README.md @@ -16,7 +16,6 @@ They favor standard tools found on RHEL-like and Debian/Ubuntu systems. Optional - `check_filesystem_readonly.sh` - read-only filesystem detection. - `check_inode_usage.sh` - inode pressure and top affected mount points. - `check_jvm_threads_heap.sh` - lightweight JVM process, heap, and thread diagnostics. -- `incident_triage_report.sh` - wrapper that runs selected checks and writes a single Markdown L2 handover report. ## Usage Examples @@ -52,21 +51,8 @@ They favor standard tools found on RHEL-like and Debian/Ubuntu systems. Optional ./check_jvm_threads_heap.sh ./check_jvm_threads_heap.sh --pid 1234 ./check_jvm_threads_heap.sh --match app-name - -./incident_triage_report.sh --type cpu -./incident_triage_report.sh --type service --service nginx --since "30 minutes ago" -./incident_triage_report.sh --type network --host app.example.com --port 443 -./incident_triage_report.sh --type all --service nginx --host app.example.com --port 443 --output triage.md ``` -## L2 Triage Report Wrapper - -`incident_triage_report.sh` collects selected incident checks into one Markdown report. It is useful for L2 mentoring, repeatable triage, and ticket evidence because it keeps the command list, point-in-time output, handover checklist, escalation notes, and recommended next steps in one place. - -Supported report types are `cpu`, `memory`, `service`, `network`, `auth`, `cert`, `filesystem`, `jvm`, and `all`. - -The wrapper is read-only apart from writing the requested `--output` file. It does not require root and skips checks safely when an underlying script is missing, not executable, or missing required context such as `--service` or `--host`. - ## Exit Codes - `0` - OK. @@ -120,5 +106,3 @@ Sanitized examples are available in [examples](./examples/): - `filesystem-readonly.sample.txt` - `inode-usage.sample.txt` - `jvm-threads-heap.sample.txt` - -A sanitized report sample is available at [../../../examples/incident-triage/l2-incident-triage-report.sample.md](../../../examples/incident-triage/l2-incident-triage-report.sample.md). diff --git a/infra-run/scripts/bash/incident-checks/check_high_cpu.sh b/infra-run/scripts/bash/incident-checks/check_high_cpu.sh index 6fd3e21..9ac8fb8 100755 --- a/infra-run/scripts/bash/incident-checks/check_high_cpu.sh +++ b/infra-run/scripts/bash/incident-checks/check_high_cpu.sh @@ -92,7 +92,7 @@ load_15m="unavailable" load_per_cpu_pct=0 if [[ -r /proc/loadavg ]]; then read -r load_1m load_5m load_15m _ < /proc/loadavg - load_per_cpu_pct="$(awk -v load_avg="$load_1m" -v cpus="$cpu_count" 'BEGIN { printf "%d", (load_avg / cpus) * 100 }')" + load_per_cpu_pct="$(awk -v load="$load_1m" -v cpus="$cpu_count" 'BEGIN { printf "%d", (load / cpus) * 100 }')" elif command -v uptime >/dev/null 2>&1; then load_line="$(uptime 2>/dev/null || true)" load_1m="$(printf '%s\n' "$load_line" | sed -n 's/.*load average[s]*: *\([^,]*\).*/\1/p')" diff --git a/infra-run/scripts/bash/incident-checks/incident_triage_report.sh b/infra-run/scripts/bash/incident-checks/incident_triage_report.sh deleted file mode 100755 index 0ef53d1..0000000 --- a/infra-run/scripts/bash/incident-checks/incident_triage_report.sh +++ /dev/null @@ -1,385 +0,0 @@ -#!/usr/bin/env bash -set -o errexit -set -o nounset -set -o pipefail - -incident_type="" -service_name="" -host_name="" -port="" -target_pid="" -match_string="" -output_file="" -since_value="1 hour ago" - -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -usage() { - cat <<'USAGE' -Usage: incident_triage_report.sh --type TYPE [options] - -Run selected read-only incident checks and produce a Markdown triage report. - -Incident types: - cpu - memory - service - network - auth - cert - filesystem - jvm - all - -Options: - --type TYPE Incident type to collect - --service SERVICE_NAME systemd service name for service checks - --host HOSTNAME_OR_FQDN host for DNS, network, or certificate checks - --port PORT TCP or TLS port for host checks - --pid PID JVM process ID - --match PROCESS_MATCH JVM process match string - --output FILE write Markdown report to FILE - --since VALUE time window for log-based checks - --help show this help - -Examples: - ./incident_triage_report.sh --type cpu - ./incident_triage_report.sh --type service --service nginx --since "30 minutes ago" - ./incident_triage_report.sh --type network --host app.example.com --port 443 - ./incident_triage_report.sh --type all --service nginx --host app.example.com --port 443 --output triage.md -USAGE -} - -is_number() { - [[ "$1" =~ ^[0-9]+$ ]] -} - -valid_type() { - case "$1" in - cpu|memory|service|network|auth|cert|filesystem|jvm|all) return 0 ;; - *) return 1 ;; - esac -} - -while (($# > 0)); do - case "$1" in - --type) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --type requires a value\n'; exit 2; } - incident_type="$2" - shift 2 - ;; - --service) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --service requires a value\n'; exit 2; } - service_name="$2" - shift 2 - ;; - --host) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --host requires a value\n'; exit 2; } - host_name="$2" - shift 2 - ;; - --port) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --port requires a value\n'; exit 2; } - port="$2" - shift 2 - ;; - --pid) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --pid requires a value\n'; exit 2; } - target_pid="$2" - shift 2 - ;; - --match) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --match requires a value\n'; exit 2; } - match_string="$2" - shift 2 - ;; - --output) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --output requires a value\n'; exit 2; } - output_file="$2" - shift 2 - ;; - --since) - [[ $# -ge 2 ]] || { printf 'CRITICAL: --since requires a value\n'; exit 2; } - since_value="$2" - shift 2 - ;; - --help|-h) - usage - exit 0 - ;; - *) - printf 'CRITICAL: unknown option: %s\n' "$1" - usage - exit 2 - ;; - esac -done - -if [[ -z "$incident_type" ]]; then - printf 'CRITICAL: --type is required\n' - usage - exit 2 -fi -if ! valid_type "$incident_type"; then - printf 'CRITICAL: unsupported incident type: %s\n' "$incident_type" - usage - exit 2 -fi -if [[ -n "$port" ]] && ! is_number "$port"; then - printf 'CRITICAL: --port must be numeric\n' - exit 2 -fi -if [[ -n "$target_pid" ]] && ! is_number "$target_pid"; then - printf 'CRITICAL: --pid must be numeric\n' - exit 2 -fi -if [[ -n "$target_pid" && -n "$match_string" ]]; then - printf 'CRITICAL: use either --pid or --match for JVM checks, not both\n' - exit 2 -fi - -tmp_dir="$(mktemp -d)" -trap 'rm -rf "$tmp_dir"' EXIT - -report_file="$tmp_dir/report.md" - -check_labels=() -check_names=() -check_commands=() -check_statuses=() -check_exit_codes=() -check_summaries=() -check_outputs=() - -status_from_exit() { - case "$1" in - 0) printf 'OK' ;; - 1) printf 'WARNING' ;; - 2) printf 'INVALID' ;; - 3) printf 'CRITICAL' ;; - *) printf 'ERROR' ;; - esac -} - -render_command() { - local item - for item in "$@"; do - printf '%q ' "$item" - done | sed 's/[[:space:]]*$//' -} - -append_skipped_check() { - local label="$1" - local name="$2" - local reason="$3" - local output_path="$tmp_dir/check_${#check_labels[@]}.txt" - - printf 'SKIPPED: %s\n' "$reason" > "$output_path" - - check_labels+=("$label") - check_names+=("$name") - check_commands+=("not run") - check_statuses+=("SKIPPED") - check_exit_codes+=("-") - check_summaries+=("$reason") - check_outputs+=("$output_path") -} - -run_check() { - local label="$1" - local script_name="$2" - shift 2 - - local script_path="${script_dir}/${script_name}" - local output_path="$tmp_dir/check_${#check_labels[@]}.txt" - local command_text - local exit_code - local status - local summary - - command_text="$(render_command "$script_path" "$@")" - - if [[ ! -e "$script_path" ]]; then - append_skipped_check "$label" "$script_name" "missing script: $script_name" - return - fi - if [[ ! -x "$script_path" ]]; then - append_skipped_check "$label" "$script_name" "script is not executable: $script_name" - return - fi - - set +e - "$script_path" "$@" > "$output_path" 2>&1 - exit_code=$? - set -e - - status="$(status_from_exit "$exit_code")" - summary="$(sed -n '1p' "$output_path")" - if [[ -z "$summary" ]]; then - summary="no output captured" - fi - - check_labels+=("$label") - check_names+=("$script_name") - check_commands+=("$command_text") - check_statuses+=("$status") - check_exit_codes+=("$exit_code") - check_summaries+=("$summary") - check_outputs+=("$output_path") -} - -run_cpu_checks() { - run_check "CPU saturation" "check_high_cpu.sh" -} - -run_memory_checks() { - run_check "Memory and OOM" "check_high_memory_oom.sh" --since "$since_value" -} - -run_service_checks() { - if [[ -z "$service_name" ]]; then - append_skipped_check "Service restart loop" "check_service_restart_loop.sh" "requires --service SERVICE_NAME" - return - fi - run_check "Service restart loop" "check_service_restart_loop.sh" --service "$service_name" --since "$since_value" -} - -run_network_checks() { - local args=(--host "$host_name") - if [[ -z "$host_name" ]]; then - append_skipped_check "DNS and connectivity" "check_dns_connectivity.sh" "requires --host HOSTNAME_OR_FQDN" - return - fi - if [[ -n "$port" ]]; then - args+=(--port "$port") - fi - run_check "DNS and connectivity" "check_dns_connectivity.sh" "${args[@]}" -} - -run_auth_checks() { - run_check "Failed SSH logins" "check_failed_ssh_logins.sh" --since "$since_value" -} - -run_cert_checks() { - local args=(--host "$host_name") - if [[ -z "$host_name" ]]; then - append_skipped_check "Certificate expiry" "check_certificate_expiry.sh" "requires --host HOSTNAME_OR_FQDN" - return - fi - if [[ -n "$port" ]]; then - args+=(--port "$port") - fi - run_check "Certificate expiry" "check_certificate_expiry.sh" "${args[@]}" -} - -run_filesystem_checks() { - run_check "Read-only filesystems" "check_filesystem_readonly.sh" - run_check "Inode usage" "check_inode_usage.sh" -} - -run_jvm_checks() { - local args=() - if [[ -n "$target_pid" ]]; then - args+=(--pid "$target_pid") - elif [[ -n "$match_string" ]]; then - args+=(--match "$match_string") - fi - run_check "JVM threads and heap" "check_jvm_threads_heap.sh" "${args[@]}" -} - -case "$incident_type" in - cpu) run_cpu_checks ;; - memory) run_memory_checks ;; - service) run_service_checks ;; - network) run_network_checks ;; - auth) run_auth_checks ;; - cert) run_cert_checks ;; - filesystem) run_filesystem_checks ;; - jvm) run_jvm_checks ;; - all) - run_cpu_checks - run_memory_checks - run_service_checks - run_network_checks - run_auth_checks - run_cert_checks - run_filesystem_checks - run_jvm_checks - ;; -esac - -generated_at="$(date -u '+%Y-%m-%dT%H:%M:%SZ')" -local_hostname="$(hostname 2>/dev/null || printf 'unknown')" -current_user="$(id -un 2>/dev/null || printf 'unknown')" - -{ - printf '# L2 Incident Triage Report\n\n' - printf -- '- Generated: %s\n' "$generated_at" - printf -- '- Local hostname: %s\n' "$local_hostname" - printf -- '- Current user: %s\n' "$current_user" - printf -- '- Incident type: %s\n' "$incident_type" - printf -- '- Service: %s\n' "${service_name:-not provided}" - printf -- '- Host: %s\n' "${host_name:-not provided}" - printf -- '- Port: %s\n' "${port:-not provided}" - printf -- '- PID: %s\n' "${target_pid:-not provided}" - printf -- '- Process match: %s\n' "${match_string:-not provided}" - printf -- '- Since: %s\n\n' "$since_value" - - printf '## Executed Checks\n\n' - printf '| Check | Script | Status | Exit | Command |\n' - printf '| --- | --- | --- | --- | --- |\n' - for index in "${!check_labels[@]}"; do - printf "| %s | \`%s\` | %s | %s | \`%s\` |\n" \ - "${check_labels[$index]}" \ - "${check_names[$index]}" \ - "${check_statuses[$index]}" \ - "${check_exit_codes[$index]}" \ - "${check_commands[$index]}" - done - printf '\n' - - printf '## Summary\n\n' - for index in "${!check_labels[@]}"; do - printf -- '- %s: %s\n' "${check_labels[$index]}" "${check_summaries[$index]}" - done - printf '\n' - - printf '## Raw Evidence\n\n' - for index in "${!check_labels[@]}"; do - printf '### %s\n\n' "${check_labels[$index]}" - printf "Script: \`%s\`\n\n" "${check_names[$index]}" - printf "Command: \`%s\`\n\n" "${check_commands[$index]}" - printf 'Status: %s, exit: %s\n\n' "${check_statuses[$index]}" "${check_exit_codes[$index]}" - printf '```text\n' - cat "${check_outputs[$index]}" - printf '\n```\n\n' - done - - printf '## L2 Handover Checklist\n\n' - printf -- '- [ ] Business impact confirmed\n' - printf -- '- [ ] Affected host/service identified\n' - printf -- '- [ ] Monitoring alert attached\n' - printf -- '- [ ] Recent changes checked\n' - printf -- '- [ ] Logs attached\n' - printf -- '- [ ] Service owner identified\n' - printf -- '- [ ] Escalation target identified\n\n' - - printf '## Escalation Notes\n\n' - printf -- '- Escalate when impact is active, spreading, customer-facing, or outside L2 access.\n' - printf -- '- Include the alert, timeline, commands run, and the raw evidence above.\n' - printf -- '- Call out skipped checks and missing inputs so the next responder does not repeat the same gap.\n' - printf -- '- Do not restart, kill, remount, or rotate anything unless the incident owner approves the action.\n\n' - - printf '## Recommended Next Steps\n\n' - printf -- '- Confirm the symptom against monitoring and user reports.\n' - printf -- '- Compare this point-in-time evidence with recent deploys, config changes, and host events.\n' - printf -- '- Attach this report to the incident ticket before handoff.\n' - printf -- '- If escalation is needed, include exact hostnames, service names, timestamps, and observed impact.\n' -} > "$report_file" - -if [[ -n "$output_file" ]]; then - cp "$report_file" "$output_file" - printf 'OK: wrote L2 incident triage report to %s\n' "$output_file" -else - cat "$report_file" -fi