This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
LOG_FILE="/var/log/ailab-apt-cleanup.log"
|
||||
execute=false
|
||||
non_interactive=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s [--execute [--non-interactive]]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while (($# > 0)); do
|
||||
case "$1" in
|
||||
--execute) execute=true ;;
|
||||
--non-interactive) non_interactive=true ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) printf 'CRITICAL: unknown argument: %s\n' "$1" >&2; usage >&2; exit 2 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [[ "$non_interactive" == true && "$execute" != true ]]; then
|
||||
printf 'CRITICAL: --non-interactive requires --execute\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
if ((EUID != 0)); then
|
||||
printf 'CRITICAL: this script must run as root\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
if ! command -v apt >/dev/null 2>&1; then
|
||||
printf 'CRITICAL: apt is required\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||
printf '\n[%s] APT cleanup\n' "$(date --iso-8601=seconds)"
|
||||
|
||||
if [[ "$execute" != true ]]; then
|
||||
printf 'INFO: dry-run mode; apt update, autoremove, autoclean, and needrestart are not executed\n'
|
||||
printf 'INFO: simulated autoremove follows\n'
|
||||
LC_ALL=C apt -s autoremove --purge
|
||||
printf 'INFO: rerun with --execute and confirm to apply changes\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$non_interactive" != true ]]; then
|
||||
printf 'WARNING: this will update APT metadata and remove packages marked as automatically installed and unused.\n'
|
||||
printf 'Type EXECUTE to continue: '
|
||||
read -r confirmation
|
||||
if [[ "$confirmation" != "EXECUTE" ]]; then
|
||||
printf 'CRITICAL: confirmation failed; no changes made\n'
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
apt update
|
||||
apt autoremove --purge -y
|
||||
apt autoclean -y
|
||||
if command -v needrestart >/dev/null 2>&1; then
|
||||
needrestart -b || true
|
||||
else
|
||||
printf 'WARNING: needrestart is not installed\n'
|
||||
fi
|
||||
printf 'OK: APT cleanup completed\n'
|
||||
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
LOG_FILE="/var/log/ailab-config-backup.log"
|
||||
BACKUP_DIR="/srv/backups/ailab-config"
|
||||
RETENTION_DAYS=30
|
||||
execute=false
|
||||
non_interactive=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s [--execute [--non-interactive]]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while (($# > 0)); do
|
||||
case "$1" in
|
||||
--execute) execute=true ;;
|
||||
--non-interactive) non_interactive=true ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) printf 'CRITICAL: unknown argument: %s\n' "$1" >&2; usage >&2; exit 2 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [[ "$non_interactive" == true && "$execute" != true ]]; then
|
||||
printf 'CRITICAL: --non-interactive requires --execute\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
if ((EUID != 0)); then
|
||||
printf 'CRITICAL: this script must run as root\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
for command_name in tar gzip find; do
|
||||
if ! command -v "$command_name" >/dev/null 2>&1; then
|
||||
printf 'CRITICAL: required command is missing: %s\n' "$command_name" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||
timestamp="$(date '+%Y%m%d-%H%M%S')"
|
||||
archive="$BACKUP_DIR/ailab-config-$timestamp.tar.gz"
|
||||
candidate_paths=(
|
||||
/etc
|
||||
/root/.bashrc
|
||||
/root/.bashrc.d
|
||||
/opt/ailab-maintenance
|
||||
/var/lib/libvirt/qemu
|
||||
)
|
||||
source_paths=()
|
||||
|
||||
printf '\n[%s] Configuration backup\n' "$(date --iso-8601=seconds)"
|
||||
for path in "${candidate_paths[@]}"; do
|
||||
if [[ -e "$path" ]]; then
|
||||
source_paths+=("${path#/}")
|
||||
printf 'OK: include %s\n' "$path"
|
||||
else
|
||||
printf 'INFO: optional path is absent: %s\n' "$path"
|
||||
fi
|
||||
done
|
||||
|
||||
if ((${#source_paths[@]} == 0)); then
|
||||
printf 'CRITICAL: no backup source paths are present\n'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf 'Backup destination: %s\n' "$archive"
|
||||
printf 'Retention: matching archives older than %d days\n' "$RETENTION_DAYS"
|
||||
printf 'Configuration beneath /etc includes libvirt, Docker, and systemd when present\n'
|
||||
printf 'Excluded by policy: Docker data, application data, model data, and VM disk images\n'
|
||||
|
||||
if [[ "$execute" != true ]]; then
|
||||
printf 'INFO: dry-run mode; no archive or directory was created and no retention deletion ran\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$non_interactive" != true ]]; then
|
||||
printf 'Type EXECUTE to create the archive and apply retention: '
|
||||
read -r confirmation
|
||||
if [[ "$confirmation" != "EXECUTE" ]]; then
|
||||
printf 'CRITICAL: confirmation failed; no changes made\n'
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
install -d -m 0750 "$BACKUP_DIR"
|
||||
tar --create --gzip --file "$archive" --ignore-failed-read --directory / -- "${source_paths[@]}"
|
||||
find "$BACKUP_DIR" -maxdepth 1 -type f -name 'ailab-config-*.tar.gz' -mtime "+$RETENTION_DAYS" -print -delete
|
||||
printf 'OK: configuration backup created: %s\n' "$archive"
|
||||
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
LOG_FILE="/var/log/ailab-disk-watch.log"
|
||||
threshold="${AILAB_DISK_THRESHOLD:-85}"
|
||||
|
||||
if ((EUID != 0)); then
|
||||
printf 'CRITICAL: this script must run as root to write %s\n' "$LOG_FILE" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ ! "$threshold" =~ ^[0-9]+$ ]] || ((threshold < 1 || threshold > 100)); then
|
||||
printf 'CRITICAL: AILAB_DISK_THRESHOLD must be an integer from 1 to 100\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||
printf '\n[%s] Disk usage check; threshold=%s%%\n' "$(date --iso-8601=seconds)" "$threshold"
|
||||
|
||||
status=0
|
||||
while read -r filesystem _blocks _used available use_percent mountpoint; do
|
||||
usage="${use_percent%\%}"
|
||||
|
||||
if [[ ! "$usage" =~ ^[0-9]+$ ]]; then
|
||||
printf 'WARNING: unable to parse usage for %s mounted on %s\n' "$filesystem" "$mountpoint"
|
||||
status=1
|
||||
elif ((usage >= threshold)); then
|
||||
printf 'WARNING: %s mounted on %s is %s used; threshold=%s%%; available=%s KB\n' \
|
||||
"$filesystem" "$mountpoint" "$use_percent" "$threshold" "$available"
|
||||
status=1
|
||||
else
|
||||
printf 'OK: %s mounted on %s is %s used\n' "$filesystem" "$mountpoint" "$use_percent"
|
||||
fi
|
||||
done < <(df -P -x tmpfs -x devtmpfs | awk 'NR > 1 {print $1, $2, $3, $4, $5, $6}')
|
||||
|
||||
exit "$status"
|
||||
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
LOG_FILE="/var/log/ailab-docker-cleanup.log"
|
||||
execute=false
|
||||
non_interactive=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s [--execute [--non-interactive]]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while (($# > 0)); do
|
||||
case "$1" in
|
||||
--execute) execute=true ;;
|
||||
--non-interactive) non_interactive=true ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) printf 'CRITICAL: unknown argument: %s\n' "$1" >&2; usage >&2; exit 2 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [[ "$non_interactive" == true && "$execute" != true ]]; then
|
||||
printf 'CRITICAL: --non-interactive requires --execute\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
if ((EUID != 0)); then
|
||||
printf 'CRITICAL: this script must run as root\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||
printf '\n[%s] Docker cleanup\n' "$(date --iso-8601=seconds)"
|
||||
|
||||
if ! command -v docker >/dev/null 2>&1; then
|
||||
printf 'INFO: Docker is not installed; nothing to do\n'
|
||||
exit 0
|
||||
fi
|
||||
if command -v systemctl >/dev/null 2>&1 && ! systemctl is-active --quiet docker; then
|
||||
printf 'INFO: docker.service is inactive; nothing to do\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
printf '\nDocker disk usage before cleanup:\n'
|
||||
docker system df
|
||||
|
||||
if [[ "$execute" != true ]]; then
|
||||
printf 'INFO: dry-run mode; would run docker system prune -af\n'
|
||||
printf 'INFO: dry-run mode; would run docker builder prune -af --filter until=168h\n'
|
||||
printf 'INFO: Docker volumes are never included in this cleanup\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$non_interactive" != true ]]; then
|
||||
printf 'WARNING: this removes unused containers, networks, images, and old build cache, but not volumes.\n'
|
||||
printf 'Type EXECUTE to continue: '
|
||||
read -r confirmation
|
||||
if [[ "$confirmation" != "EXECUTE" ]]; then
|
||||
printf 'CRITICAL: confirmation failed; no changes made\n'
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
docker system prune -af
|
||||
docker builder prune -af --filter "until=168h"
|
||||
|
||||
printf '\nDocker disk usage after cleanup:\n'
|
||||
docker system df
|
||||
printf 'OK: Docker cleanup completed; volumes were not pruned\n'
|
||||
+111
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
section() {
|
||||
printf '\n== %s ==\n' "$1"
|
||||
}
|
||||
|
||||
run_optional() {
|
||||
local description="$1"
|
||||
shift
|
||||
|
||||
if "$@"; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
printf 'WARNING: %s failed\n' "$description"
|
||||
return 0
|
||||
}
|
||||
|
||||
section "Host identity"
|
||||
if command -v hostnamectl >/dev/null 2>&1; then
|
||||
run_optional "hostnamectl" hostnamectl
|
||||
else
|
||||
run_optional "hostname" hostname
|
||||
fi
|
||||
run_optional "kernel information" uname -a
|
||||
run_optional "uptime" uptime
|
||||
|
||||
section "Memory"
|
||||
if command -v free >/dev/null 2>&1; then
|
||||
run_optional "memory report" free -h
|
||||
else
|
||||
printf 'WARNING: free is not available\n'
|
||||
fi
|
||||
|
||||
section "Filesystems"
|
||||
if command -v df >/dev/null 2>&1; then
|
||||
run_optional "filesystem report" df -hT
|
||||
printf '\nKey mountpoints present:\n'
|
||||
for mountpoint in / /boot /var /srv /opt /home; do
|
||||
if findmnt -rn --target "$mountpoint" >/dev/null 2>&1; then
|
||||
run_optional "filesystem report for $mountpoint" df -hT "$mountpoint"
|
||||
fi
|
||||
done
|
||||
else
|
||||
printf 'WARNING: df is not available\n'
|
||||
fi
|
||||
|
||||
section "Journal usage"
|
||||
if command -v journalctl >/dev/null 2>&1; then
|
||||
run_optional "journal disk usage" journalctl --disk-usage
|
||||
else
|
||||
printf 'WARNING: journalctl is not available\n'
|
||||
fi
|
||||
|
||||
section "Docker"
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
run_optional "Docker service state" systemctl is-active docker
|
||||
fi
|
||||
run_optional "Docker container list" docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}'
|
||||
run_optional "Docker disk usage" docker system df
|
||||
else
|
||||
printf 'INFO: Docker is not installed\n'
|
||||
fi
|
||||
|
||||
section "Libvirt"
|
||||
if command -v virsh >/dev/null 2>&1; then
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
run_optional "libvirtd service state" systemctl is-active libvirtd
|
||||
fi
|
||||
run_optional "libvirt guest list" virsh list --all
|
||||
else
|
||||
printf 'INFO: virsh is not installed\n'
|
||||
fi
|
||||
|
||||
section "NVIDIA"
|
||||
if command -v nvidia-smi >/dev/null 2>&1; then
|
||||
run_optional "NVIDIA status" nvidia-smi
|
||||
else
|
||||
printf 'INFO: nvidia-smi is not installed\n'
|
||||
fi
|
||||
|
||||
section "Failed systemd units"
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
run_optional "failed systemd unit report" systemctl --failed --no-pager
|
||||
else
|
||||
printf 'WARNING: systemctl is not available\n'
|
||||
fi
|
||||
|
||||
section "SMART quick health"
|
||||
if command -v smartctl >/dev/null 2>&1; then
|
||||
shopt -s nullglob
|
||||
devices=(/dev/sd? /dev/nvme?n?)
|
||||
shopt -u nullglob
|
||||
|
||||
if ((${#devices[@]} == 0)); then
|
||||
printf 'INFO: no matching SATA/SCSI or NVMe devices found\n'
|
||||
else
|
||||
for device in "${devices[@]}"; do
|
||||
printf '\n-- %s --\n' "$device"
|
||||
run_optional "SMART health check for $device" smartctl -H "$device"
|
||||
done
|
||||
fi
|
||||
else
|
||||
printf 'INFO: smartctl is not installed\n'
|
||||
fi
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
# APT autoremove respects package dependencies and kernel protection rules. That
|
||||
# is safer than name-based purging on HWE hosts using NVIDIA, DKMS, or VFIO.
|
||||
|
||||
LOG_FILE="/var/log/ailab-kernel-cleanup.log"
|
||||
execute=false
|
||||
non_interactive=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s [--execute [--non-interactive]]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
kernel_packages() {
|
||||
dpkg-query -W -f='${db:Status-Abbrev} ${binary:Package}\n' \
|
||||
'linux-image*' 'linux-headers*' 'linux-modules*' 2>/dev/null \
|
||||
| awk '$1 ~ /^ii/ {print $2}' \
|
||||
| sort -u || true
|
||||
}
|
||||
|
||||
versioned_kernel_images() {
|
||||
dpkg-query -W -f='${db:Status-Abbrev} ${binary:Package}\n' 'linux-image-[0-9]*' 2>/dev/null \
|
||||
| awk '$1 ~ /^ii/ {sub(/:.*/, "", $2); print $2}' \
|
||||
| sort -u || true
|
||||
}
|
||||
|
||||
while (($# > 0)); do
|
||||
case "$1" in
|
||||
--execute) execute=true ;;
|
||||
--non-interactive) non_interactive=true ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) printf 'CRITICAL: unknown argument: %s\n' "$1" >&2; usage >&2; exit 2 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [[ "$non_interactive" == true && "$execute" != true ]]; then
|
||||
printf 'CRITICAL: --non-interactive requires --execute\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
if ((EUID != 0)); then
|
||||
printf 'CRITICAL: this script must run as root\n' >&2
|
||||
exit 2
|
||||
fi
|
||||
for command_name in apt dpkg-query uname; do
|
||||
if ! command -v "$command_name" >/dev/null 2>&1; then
|
||||
printf 'CRITICAL: required command is missing: %s\n' "$command_name" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||
printf '\n[%s] Kernel cleanup\n' "$(date --iso-8601=seconds)"
|
||||
printf 'Running kernel: %s\n' "$(uname -r)"
|
||||
printf '\nInstalled kernel-related packages before cleanup:\n'
|
||||
kernel_packages
|
||||
|
||||
simulation="$(LC_ALL=C apt -s autoremove --purge)"
|
||||
printf '\nAPT autoremove simulation:\n%s\n' "$simulation"
|
||||
|
||||
mapfile -t installed_images < <(versioned_kernel_images)
|
||||
mapfile -t removed_images < <(
|
||||
awk '$1 == "Remv" && $2 ~ /^linux-image-[0-9]/ {sub(/:.*/, "", $2); print $2}' <<<"$simulation" | sort -u
|
||||
)
|
||||
|
||||
remaining_images=0
|
||||
for image in "${installed_images[@]}"; do
|
||||
remove_image=false
|
||||
for removed in "${removed_images[@]}"; do
|
||||
if [[ "$image" == "$removed" ]]; then
|
||||
remove_image=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ "$remove_image" != true ]]; then
|
||||
remaining_images=$((remaining_images + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
printf 'Kernel image safety check: installed=%d simulated-removals=%d remaining=%d\n' \
|
||||
"${#installed_images[@]}" "${#removed_images[@]}" "$remaining_images"
|
||||
|
||||
if ((${#installed_images[@]} < 2 || remaining_images < 2)); then
|
||||
printf 'CRITICAL: cleanup would not leave at least two versioned kernel images; refusing execution\n'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$execute" != true ]]; then
|
||||
printf 'INFO: dry-run mode; no packages were removed\n'
|
||||
printf 'INFO: rerun with --execute and confirm to apply the simulated cleanup\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$non_interactive" != true ]]; then
|
||||
printf 'WARNING: APT will remove the packages shown in the simulation above.\n'
|
||||
printf 'Type EXECUTE to continue: '
|
||||
read -r confirmation
|
||||
if [[ "$confirmation" != "EXECUTE" ]]; then
|
||||
printf 'CRITICAL: confirmation failed; no changes made\n'
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
apt autoremove --purge -y
|
||||
apt autoclean -y
|
||||
if command -v update-grub >/dev/null 2>&1; then
|
||||
update-grub || true
|
||||
else
|
||||
printf 'WARNING: update-grub is not installed\n'
|
||||
fi
|
||||
|
||||
printf '\nInstalled kernel-related packages after cleanup:\n'
|
||||
kernel_packages
|
||||
printf 'OK: kernel cleanup completed with APT-managed package selection\n'
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
section() {
|
||||
printf '\n== %s ==\n' "$1"
|
||||
}
|
||||
|
||||
if ! command -v virsh >/dev/null 2>&1; then
|
||||
printf 'INFO: virsh is not installed; VM audit skipped\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
section "Virtual machines"
|
||||
virsh list --all || printf 'WARNING: unable to list virtual machines\n'
|
||||
|
||||
section "Storage pools"
|
||||
virsh pool-list --all || printf 'WARNING: unable to list storage pools\n'
|
||||
|
||||
mapfile -t pools < <(virsh pool-list --all --name 2>/dev/null | sed '/^[[:space:]]*$/d' || true)
|
||||
for pool in "${pools[@]}"; do
|
||||
section "Volumes in pool: $pool"
|
||||
virsh vol-list "$pool" || printf 'WARNING: unable to list volumes in pool %s\n' "$pool"
|
||||
done
|
||||
|
||||
section "Possible VM disk and installation images"
|
||||
search_roots=()
|
||||
for path in /var/lib/libvirt /srv /opt; do
|
||||
[[ -d "$path" ]] && search_roots+=("$path")
|
||||
done
|
||||
|
||||
if ((${#search_roots[@]} == 0)); then
|
||||
printf 'INFO: no configured search roots are present\n'
|
||||
else
|
||||
find "${search_roots[@]}" -xdev -type f \
|
||||
\( -iname '*.qcow2' -o -iname '*.raw' -o -iname '*.iso' \) \
|
||||
-printf '%12s bytes %p\n' 2>/dev/null \
|
||||
| sort -nr || true
|
||||
fi
|
||||
|
||||
printf '\nINFO: audit complete; no files or libvirt resources were modified\n'
|
||||
Reference in New Issue
Block a user