Add disk full incident response toolkit
This commit is contained in:
+67
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
SEARCH_PATH="/"
|
||||
TOP_N=20
|
||||
DEPTH=2
|
||||
EXECUTE=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s [--path <path>] [--top <N>] [--depth <N>] [--execute]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--path) SEARCH_PATH="${2:-}"; shift 2 ;;
|
||||
--top) TOP_N="${2:-}"; shift 2 ;;
|
||||
--depth) DEPTH="${2:-}"; shift 2 ;;
|
||||
--execute) EXECUTE=true; DRY_RUN=false; shift ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
section "Disk Full Incident Workflow"
|
||||
cat <<FLOW | tee -a "$LOG_FILE"
|
||||
Step 1. Disk overview
|
||||
$SCRIPT_DIR/01_disk_overview.sh
|
||||
|
||||
Step 2. Find largest files
|
||||
$SCRIPT_DIR/02_find_big_files.sh --path "$SEARCH_PATH" --top "$TOP_N"
|
||||
|
||||
Step 3. Check deleted but open files
|
||||
$SCRIPT_DIR/03_deleted_open_files.sh
|
||||
|
||||
Step 4. Identify top directories
|
||||
$SCRIPT_DIR/04_top_dirs.sh --path "$SEARCH_PATH" --depth "$DEPTH"
|
||||
|
||||
Step 5. Review safe log cleanup suggestions
|
||||
$SCRIPT_DIR/05_log_cleanup.sh
|
||||
|
||||
Step 6. Optional emergency quick fix, only after approval
|
||||
$SCRIPT_DIR/06_quick_fix.sh --truncate-file /path/to/verified.log --execute
|
||||
$SCRIPT_DIR/06_quick_fix.sh --restart-service service-name --execute
|
||||
|
||||
Step 7. Post-check
|
||||
$SCRIPT_DIR/07_postcheck.sh
|
||||
FLOW
|
||||
|
||||
if [[ "$EXECUTE" == "true" ]]; then
|
||||
warning "--execute was supplied to the runbook. Destructive actions are still not run automatically."
|
||||
fi
|
||||
|
||||
section "Running Read-Only Incident Checks"
|
||||
"$SCRIPT_DIR/01_disk_overview.sh" || warning "Disk overview reported critical usage"
|
||||
"$SCRIPT_DIR/02_find_big_files.sh" --path "$SEARCH_PATH" --top "$TOP_N" || warning "Large-file scan reported an issue"
|
||||
"$SCRIPT_DIR/03_deleted_open_files.sh" || warning "Deleted-open-file check reported an issue"
|
||||
"$SCRIPT_DIR/04_top_dirs.sh" --path "$SEARCH_PATH" --depth "$DEPTH" || warning "Top-directory scan reported an issue"
|
||||
"$SCRIPT_DIR/05_log_cleanup.sh" || warning "Log cleanup suggestion step reported an issue"
|
||||
|
||||
section "Next Manual Decision"
|
||||
ok "Review findings, identify owner and retention requirements, then run a targeted cleanup script with --execute only if approved."
|
||||
Reference in New Issue
Block a user