Add AI lab maintenance toolkit
lint / shell-yaml-ansible (push) Failing after 17s

This commit is contained in:
Mateusz Suski
2026-06-06 00:10:44 +00:00
parent 1843796e92
commit 8cb92de06f
21 changed files with 1031 additions and 0 deletions
+103
View File
@@ -0,0 +1,103 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
JOURNALD_DROP_IN="/etc/systemd/journald.conf.d/ailab-limits.conf"
DOCKER_CONFIG="/etc/docker/daemon.json"
packages=(
logrotate
needrestart
smartmontools
nvme-cli
sysstat
iotop
ncdu
duf
jq
lsof
psmisc
tar
gzip
)
timers=(
ailab-apt-cleanup.timer
ailab-kernel-cleanup.timer
ailab-docker-cleanup.timer
ailab-config-backup.timer
ailab-disk-watch.timer
)
if ((EUID != 0)); then
printf 'CRITICAL: install.sh must run as root\n' >&2
exit 2
fi
for command_name in apt-get install systemctl; do
if ! command -v "$command_name" >/dev/null 2>&1; then
printf 'CRITICAL: required command is missing: %s\n' "$command_name" >&2
exit 2
fi
done
printf 'Installing maintenance dependencies...\n'
apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y "${packages[@]}"
printf 'Installing scripts and systemd units...\n'
for script in "$SCRIPT_DIR"/scripts/*.sh; do
install -m 0755 "$script" "/usr/local/sbin/$(basename "$script")"
done
for unit in "$SCRIPT_DIR"/systemd/*.{service,timer}; do
install -m 0644 "$unit" "/etc/systemd/system/$(basename "$unit")"
done
install -d -m 0755 "$(dirname "$JOURNALD_DROP_IN")"
tmp_journald="$(mktemp)"
trap 'rm -f "$tmp_journald" "${tmp_docker:-}"' EXIT
cat >"$tmp_journald" <<'EOF'
[Journal]
SystemMaxUse=1G
SystemKeepFree=2G
MaxRetentionSec=14day
Compress=yes
EOF
install -m 0644 "$tmp_journald" "$JOURNALD_DROP_IN"
systemctl restart systemd-journald
if command -v docker >/dev/null 2>&1; then
printf 'Configuring Docker log rotation limits...\n'
install -d -m 0755 /etc/docker
tmp_docker="$(mktemp)"
if [[ -f "$DOCKER_CONFIG" ]]; then
if ! jq empty "$DOCKER_CONFIG" >/dev/null 2>&1; then
printf 'CRITICAL: %s is not valid JSON; refusing to overwrite it\n' "$DOCKER_CONFIG" >&2
exit 1
fi
backup="$DOCKER_CONFIG.$(date '+%Y%m%d-%H%M%S').bak"
install -m 0644 "$DOCKER_CONFIG" "$backup"
jq '. + {
"log-driver": "json-file",
"log-opts": ((."log-opts" // {}) + {"max-size": "50m", "max-file": "5"})
}' "$DOCKER_CONFIG" >"$tmp_docker"
else
jq -n '{
"log-driver": "json-file",
"log-opts": {"max-size": "50m", "max-file": "5"}
}' >"$tmp_docker"
fi
jq empty "$tmp_docker"
install -m 0644 "$tmp_docker" "$DOCKER_CONFIG"
systemctl restart docker || true
else
printf 'INFO: Docker is not installed; Docker daemon configuration was skipped\n'
fi
systemctl daemon-reload
systemctl enable --now "${timers[@]}"
printf '\nEnabled AI Lab timers:\n'
systemctl list-timers --all --no-pager | grep 'ailab-' || true
/usr/local/sbin/ailab-healthcheck.sh > /root/ailab-healthcheck-now.txt
printf '\nOK: installation complete; initial health report: /root/ailab-healthcheck-now.txt\n'