From 4e739c5c995531d59bb28c3e924250554189bd22 Mon Sep 17 00:00:00 2001 From: Mateusz Suski Date: Sat, 6 Jun 2026 00:23:11 +0000 Subject: [PATCH] Add Linux fresh setup toolkit --- CHANGELOG.md | 1 + labs/README.md | 1 + labs/linux/setup/README.md | 276 ++++++++++++++++++ labs/linux/setup/docs/bash-shell.md | 53 ++++ labs/linux/setup/docs/cockpit.md | 41 +++ labs/linux/setup/docs/docker.md | 56 ++++ .../setup/docs/fresh-install-checklist.md | 47 +++ labs/linux/setup/docs/libvirt.md | 54 ++++ labs/linux/setup/docs/nvidia.md | 52 ++++ labs/linux/setup/files/bashrc.d/ailab.sh | 133 +++++++++ labs/linux/setup/files/docker/daemon.json | 7 + labs/linux/setup/files/sysctl/99-ailab.conf | 3 + .../files/systemd/journald-ailab-limits.conf | 5 + labs/linux/setup/install.sh | 182 ++++++++++++ .../linux/setup/scripts/00-platform-guard.inc | 20 ++ labs/linux/setup/scripts/00-preflight.sh | 124 ++++++++ labs/linux/setup/scripts/01-base-packages.sh | 41 +++ labs/linux/setup/scripts/02-shell-profile.sh | 60 ++++ labs/linux/setup/scripts/03-cockpit.sh | 36 +++ labs/linux/setup/scripts/04-docker.sh | 136 +++++++++ labs/linux/setup/scripts/05-libvirt.sh | 33 +++ labs/linux/setup/scripts/06-nvidia-tools.sh | 88 ++++++ labs/linux/setup/scripts/07-tuning.sh | 67 +++++ .../setup/scripts/08-security-baseline.sh | 61 ++++ labs/linux/setup/scripts/99-postcheck.sh | 69 +++++ 25 files changed, 1646 insertions(+) create mode 100644 labs/linux/setup/README.md create mode 100644 labs/linux/setup/docs/bash-shell.md create mode 100644 labs/linux/setup/docs/cockpit.md create mode 100644 labs/linux/setup/docs/docker.md create mode 100644 labs/linux/setup/docs/fresh-install-checklist.md create mode 100644 labs/linux/setup/docs/libvirt.md create mode 100644 labs/linux/setup/docs/nvidia.md create mode 100644 labs/linux/setup/files/bashrc.d/ailab.sh create mode 100644 labs/linux/setup/files/docker/daemon.json create mode 100644 labs/linux/setup/files/sysctl/99-ailab.conf create mode 100644 labs/linux/setup/files/systemd/journald-ailab-limits.conf create mode 100755 labs/linux/setup/install.sh create mode 100644 labs/linux/setup/scripts/00-platform-guard.inc create mode 100755 labs/linux/setup/scripts/00-preflight.sh create mode 100755 labs/linux/setup/scripts/01-base-packages.sh create mode 100755 labs/linux/setup/scripts/02-shell-profile.sh create mode 100755 labs/linux/setup/scripts/03-cockpit.sh create mode 100755 labs/linux/setup/scripts/04-docker.sh create mode 100755 labs/linux/setup/scripts/05-libvirt.sh create mode 100755 labs/linux/setup/scripts/06-nvidia-tools.sh create mode 100755 labs/linux/setup/scripts/07-tuning.sh create mode 100755 labs/linux/setup/scripts/08-security-baseline.sh create mode 100755 labs/linux/setup/scripts/99-postcheck.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 73d0570..55945da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Added +- Added Linux Fresh Setup Toolkit under `labs/linux/setup` for day-0 Ubuntu lab host bootstrap automation. - Added AI Lab Maintenance Toolkit with systemd-based Linux maintenance automation. - Python tooling validation for operational scripts. - `incident-log-summary` for general incident log summarization. diff --git a/labs/README.md b/labs/README.md index 81398e9..68b71db 100644 --- a/labs/README.md +++ b/labs/README.md @@ -12,6 +12,7 @@ Current subdirectories are planning areas unless their own README documents a ru ## Linux operations labs +- [Linux Fresh Setup Toolkit](./linux/setup/) - Bootstrap automation for fresh Ubuntu lab hosts, including shell profile, Cockpit, Docker, libvirt/KVM, NVIDIA diagnostics, tuning and safe baseline defaults. - [AI Lab Maintenance Toolkit](./linux/ailab-maintenance/) - Homelab-safe Linux maintenance automation for an Ubuntu AI infrastructure host, covering cleanup, health checks, config backup, Docker hygiene, kernel safety and systemd timers. Lab content should document prerequisites, topology, validation, cleanup, and what remains untested. Do not present lab behavior as production-ready. diff --git a/labs/linux/setup/README.md b/labs/linux/setup/README.md new file mode 100644 index 0000000..1407933 --- /dev/null +++ b/labs/linux/setup/README.md @@ -0,0 +1,276 @@ +# Linux Fresh Setup Toolkit + +## Executive summary + +The Linux Fresh Setup Toolkit is day-0 bootstrap automation for a clean Ubuntu +lab server or workstation. It prepares a host for routine administration, +Cockpit, Docker workloads, libvirt/KVM virtual machines, optional NVIDIA +diagnostics, bounded logging, practical kernel tuning, and a conservative +security baseline. + +The scripts are modular and safe to rerun. Optional components remain optional, +UFW is not enabled without a specific flag, and an NVIDIA driver is never +installed without an explicit version. This is a portfolio and homelab +implementation, not a production-certified build standard. + +## Scope and non-goals + +The toolkit supports Ubuntu 24.04 and newer and assumes a systemd-based host +with APT package management. It is suitable for a host such as `ailab` that may +run WebODM, Open WebUI, Homepage, NVIDIA workloads, or test virtual machines. + +It does not: + +- Deploy applications, containers, or virtual machines. +- Configure GPU passthrough, VFIO bindings, bridges, or Windows guests. +- Select an NVIDIA driver automatically. +- Define a complete firewall policy or compliance baseline. +- Replace backup, monitoring, patching, or ongoing maintenance processes. +- Claim live validation against every future Ubuntu release. + +## Why this is separate from ailab-maintenance + +This project establishes a fresh host. The sibling +[AI Lab Maintenance Toolkit](../ailab-maintenance/) handles day-2 health +checks, scheduled cleanup, configuration backup, disk monitoring, and VM +inventory after a host is operating. + +Keeping bootstrap and maintenance separate makes the change boundary clear: +this toolkit installs platform capabilities and baseline configuration, while +the maintenance toolkit manages recurring operational tasks. + +## Directory layout + +```text +setup/ +├── README.md +├── install.sh +├── scripts/ +│ ├── 00-preflight.sh +│ ├── 00-platform-guard.inc +│ ├── 01-base-packages.sh +│ ├── 02-shell-profile.sh +│ ├── 03-cockpit.sh +│ ├── 04-docker.sh +│ ├── 05-libvirt.sh +│ ├── 06-nvidia-tools.sh +│ ├── 07-tuning.sh +│ ├── 08-security-baseline.sh +│ └── 99-postcheck.sh +├── files/ +│ ├── bashrc.d/ailab.sh +│ ├── docker/daemon.json +│ ├── sysctl/99-ailab.conf +│ └── systemd/journald-ailab-limits.conf +└── docs/ + ├── fresh-install-checklist.md + ├── cockpit.md + ├── docker.md + ├── libvirt.md + ├── nvidia.md + └── bash-shell.md +``` + +`00-platform-guard.inc` is an internal sourced helper used by mutating +component scripts; it is not an executable profile. + +## Supported profiles and flags + +| Flag | Result | +| --- | --- | +| `--base` | Install operational CLI, diagnostic, storage, and network packages | +| `--shell` | Install the root AI lab Bash profile | +| `--cockpit` | Install and enable Cockpit | +| `--docker` | Install Docker and bounded JSON-file logging | +| `--libvirt` | Install and enable libvirt/KVM | +| `--nvidia-tools` | Install NVIDIA and OpenCL diagnostics without a driver | +| `--install-nvidia-driver VERSION` | Install diagnostics and the named Ubuntu driver package | +| `--tuning` | Apply journald, sysctl, sensor, and sysstat settings | +| `--security` | Install and enable fail2ban; install but do not enable UFW | +| `--enable-ufw` | Run security setup and explicitly enable UFW | +| `--all` | Run every standard profile without UFW enablement or driver installation | + +`--install-nvidia-driver` implies `--nvidia-tools`. `--enable-ufw` implies +`--security`. With no flags, the installer prints help and makes no changes. + +## Installation examples + +Review the scripts and current host access path before execution: + +```bash +cd labs/linux/setup +./install.sh +sudo ./install.sh --base --shell +sudo ./install.sh --cockpit --docker --libvirt +sudo ./install.sh --all +``` + +Explicit high-impact options can be combined with `--all`: + +```bash +sudo ./install.sh --all --enable-ufw +sudo ./install.sh --all --install-nvidia-driver 550 +``` + +The installer runs the read-only preflight once before selected profiles and a +postcheck after all successful profile steps. + +## Fresh host workflow + +1. Patch the base Ubuntu installation and confirm console or out-of-band access. +2. Review [the fresh install checklist](docs/fresh-install-checklist.md). +3. Run `sudo ./install.sh --base --shell`. +4. Add only the platform profiles needed by the host. +5. Review service state, listening ports, storage, networking, and warnings in + the postcheck. +6. Reboot if a driver or kernel-related package requires it. +7. Capture host-specific configuration and backup requirements separately. + +## AI lab workflow + +A general AI lab host can start with: + +```bash +sudo ./install.sh --base --shell --cockpit --docker --nvidia-tools --tuning --security +``` + +This installs GPU diagnostics but leaves driver choice to the operator. Add +libvirt only when the host will run VMs. Enable UFW only after confirming SSH, +Cockpit, application, bridge, and VM networking requirements. + +## Safety model + +- Mutating profiles require root and refuse non-Ubuntu systems or Ubuntu older + than 24.04. +- Component profiles install their own direct prerequisites. +- Existing managed configuration is changed only when content differs. +- Changed root shell, Docker, journald, and sysctl files receive timestamped + backups. +- Existing valid Docker JSON is merged so unrelated settings survive. +- Invalid Docker JSON stops configuration rather than being overwritten. +- UFW and NVIDIA driver installation require explicit flags. +- Package and service failures are not hidden. +- Postcheck warnings report optional or inactive components without masking a + successfully completed diagnostic script. + +APT installation and service restarts are real system changes. Test first on a +disposable host and maintain a console path when changing remote access policy. + +## Bash shell profile + +The shell profile is installed as `/root/.bashrc.d/ailab.sh`, and one exact +source line is maintained in `/root/.bashrc`. It adds concise helpers for +systemd, journals, Docker, libvirt, NVIDIA, ports, archives, and disk usage. + +See [Bash shell profile](docs/bash-shell.md) for command details and cautions. + +## Cockpit setup + +Cockpit provides browser-based host, storage, network, package, VM, metrics, +and support-report views. The installer enables `cockpit.socket` and reports +`https://HOSTNAME:9090`. `cockpit-files` is optional because it is not +available in every enabled Ubuntu repository. + +See [Cockpit setup](docs/cockpit.md). + +## Docker setup + +The Ubuntu `docker.io` package path is preferred. The Docker official +repository is configured only when `docker.io` is unavailable. The daemon uses +the `json-file` log driver with five 50 MB files per container. + +The toolkit configures log retention only. It does not prune data, deploy +Compose applications, or configure an NVIDIA container runtime. + +See [Docker setup](docs/docker.md). + +## libvirt/KVM setup + +The libvirt profile installs QEMU, OVMF, software TPM support, virt-install, +virt-manager, bridge utilities, and libvirt clients and services. It enables +`libvirtd` and prints existing guests and networks. + +See [libvirt/KVM setup](docs/libvirt.md). + +## NVIDIA tooling + +The default NVIDIA profile installs `nvtop`, `clinfo`, and PCI diagnostics. +It reports detected NVIDIA devices, `nvidia-smi`, and DKMS state when those +commands exist. + +Driver installation requires a numeric version that maps to an available +Ubuntu package, for example `nvidia-driver-550`. Secure Boot enrollment, +driver suitability, CUDA, container runtime support, and passthrough remain +operator decisions. + +See [NVIDIA tooling](docs/nvidia.md). + +## Tuning + +The tuning profile bounds persistent journal use, raises inotify limits for +development and container workloads, reduces swappiness, enables sysstat, and +runs automatic sensor detection when available. + +Review these values against available memory, storage, monitoring retention, +and workload behavior before deployment beyond a lab. + +## Security baseline + +The security profile installs UFW and fail2ban and enables fail2ban. It leaves +UFW disabled unless `--enable-ufw` is present. Explicit UFW enablement permits +OpenSSH and TCP port 9090 before activation. + +This is a minimal access-preservation baseline, not a complete host firewall or +hardening standard. Application and VM networking may require additional +reviewed rules. + +## Postcheck + +The final script reports: + +- Failed systemd units. +- Cockpit, Docker, libvirt, and fail2ban status when installed. +- Running Docker containers and defined virtual machines. +- NVIDIA runtime state. +- Filesystem usage and listening ports. + +Warnings require operator review but optional component absence does not cause +the postcheck itself to fail. + +## Troubleshooting + +Run individual read-only checks after correcting a failed profile: + +```bash +sudo ./scripts/00-preflight.sh +sudo ./scripts/99-postcheck.sh +systemctl --failed +journalctl -u docker -u libvirtd -u cockpit.socket -u fail2ban +``` + +Common failure areas are unavailable APT repositories, unsupported package +names on a future Ubuntu release, invalid pre-existing Docker JSON, Secure Boot +module signing, disabled CPU virtualization, and remote firewall assumptions. + +To roll back a managed configuration, compare the current file with its +timestamped `.bak` copy, restore the reviewed backup, and restart or reload the +owning service. Package removal is intentionally not automated because it may +affect workloads and dependencies. + +## Interview talking points + +- Why day-0 bootstrap and day-2 maintenance have separate ownership. +- How explicit flags protect firewall and GPU driver decisions. +- Why Docker JSON is validated, backed up, and merged. +- How idempotent content checks prevent backup and restart churn. +- Why preflight and postcheck evidence surround mutating profiles. +- Which virtualization, Secure Boot, IOMMU, and GPU decisions remain manual. + +## Future improvements + +- Add automated tests using disposable Ubuntu VMs. +- Add a documented NVIDIA Container Toolkit profile. +- Add optional non-root administrative user and group membership management. +- Add bridge and VFIO planning checks without applying passthrough changes. +- Add package compatibility matrices after validating future Ubuntu releases. +- Export postcheck results in a structured format for evidence collection. diff --git a/labs/linux/setup/docs/bash-shell.md b/labs/linux/setup/docs/bash-shell.md new file mode 100644 index 0000000..6a57a85 --- /dev/null +++ b/labs/linux/setup/docs/bash-shell.md @@ -0,0 +1,53 @@ +# Bash Shell Profile + +## Installation + +The shell profile is installed for root: + +```text +/root/.bashrc.d/ailab.sh +``` + +The installer maintains one exact source line in `/root/.bashrc` and backs up +changed files. Start a new Bash session or run: + +```bash +source /root/.bashrc +``` + +## Aliases + +| Alias | Purpose | +| --- | --- | +| `ll`, `la` | Detailed and hidden-file directory listings | +| `ports` | Listening TCP/UDP sockets and processes | +| `dus`, `dufh` | Directory and filesystem usage | +| `failed`, `jerr`, `timers` | systemd failure, journal error, and timer views | +| `dps`, `ddf`, `dcu` | Docker containers, disk use, and Compose startup | +| `vms` | All libvirt guests | +| `gpu`, `gpuloop` | NVIDIA status once or refreshed every two seconds | +| `now` | Current timestamp and timezone | + +`dcu` runs `docker compose up -d` in the current directory and therefore may +create or start resources. Review the Compose project before using it. + +## Functions + +- `svc_status SERVICE` +- `svc_logs SERVICE [LINES]` +- `docker_logs CONTAINER [LINES]` +- `docker_restart CONTAINER` +- `vm_autostart VM` +- `vm_no_autostart VM` +- `path_backup PATH` +- `extract ARCHIVE` + +Functions validate argument counts, and Docker, libvirt, and NVIDIA helpers +report missing commands clearly. `path_backup` creates a timestamped adjacent +copy and can consume substantial space for large paths. + +## Rollback + +Review timestamped backups under `/root`, restore the desired `.bashrc` or +profile copy, and start a new shell. Avoid restoring a backup without checking +for unrelated shell changes made after bootstrap. diff --git a/labs/linux/setup/docs/cockpit.md b/labs/linux/setup/docs/cockpit.md new file mode 100644 index 0000000..7c8b25a --- /dev/null +++ b/labs/linux/setup/docs/cockpit.md @@ -0,0 +1,41 @@ +# Cockpit + +## Purpose + +The Cockpit profile installs browser-based host administration modules for +system state, storage, networking, packages, virtual machines, metrics, and +support reports. It enables the socket-activated service. + +## Installation and validation + +```bash +sudo ./install.sh --cockpit +systemctl status cockpit.socket +ss -ltnp | grep ':9090' +``` + +Connect to `https://HOSTNAME:9090`. A browser warning is expected when the +default host certificate is not trusted. + +`cockpit-files` is installed when available and skipped with a warning +otherwise. + +## Access and firewall + +The Cockpit profile does not change UFW. Explicit toolkit UFW enablement allows +TCP 9090, but upstream firewalls and network ACLs remain external concerns. +Use normal Linux accounts and review which users may administer the host. + +## Troubleshooting and rollback + +```bash +journalctl -u cockpit.socket -u cockpit.service +systemctl restart cockpit.socket +apt-cache policy cockpit cockpit-machines cockpit-files +``` + +To disable remote access without removing packages: + +```bash +sudo systemctl disable --now cockpit.socket +``` diff --git a/labs/linux/setup/docs/docker.md b/labs/linux/setup/docs/docker.md new file mode 100644 index 0000000..c7655b4 --- /dev/null +++ b/labs/linux/setup/docs/docker.md @@ -0,0 +1,56 @@ +# Docker + +## Package policy + +The profile prefers Ubuntu's `docker.io` package. If that package is +unavailable after an APT refresh, it configures Docker's official Ubuntu +repository and installs Docker Engine, containerd, Buildx, and Compose plugins. + +This fallback requires network access to `download.docker.com`. + +## Daemon configuration + +The managed settings are: + +```json +{ + "log-driver": "json-file", + "log-opts": { + "max-size": "50m", + "max-file": "5" + } +} +``` + +Existing valid `/etc/docker/daemon.json` content is preserved and merged with +these log settings. A changed file is backed up with a timestamp. Invalid JSON +causes the profile to stop rather than overwrite operator configuration. + +Log limits apply to newly created containers. Existing containers may retain +their original logging configuration until recreated. + +## Validation + +```bash +docker version +docker compose version +docker info +docker ps +docker system df +jq . /etc/docker/daemon.json +``` + +## Troubleshooting and rollback + +```bash +systemctl status docker +journalctl -u docker +jq empty /etc/docker/daemon.json +``` + +To restore a previous daemon configuration, review a timestamped backup, +replace the current file, validate it with `jq empty`, and restart Docker. +Do not restore blindly when workloads depend on newer daemon settings. + +The profile does not configure Docker data roots, prune objects, deploy +applications, or install the NVIDIA Container Toolkit. diff --git a/labs/linux/setup/docs/fresh-install-checklist.md b/labs/linux/setup/docs/fresh-install-checklist.md new file mode 100644 index 0000000..fc5d127 --- /dev/null +++ b/labs/linux/setup/docs/fresh-install-checklist.md @@ -0,0 +1,47 @@ +# Fresh Install Checklist + +## Before bootstrap + +- Confirm Ubuntu 24.04 or newer and record the release and kernel. +- Apply firmware settings for virtualization, IOMMU, or Secure Boot as needed. +- Confirm console or out-of-band access before firewall work. +- Record interfaces, addresses, routes, DNS, storage, and intended mountpoints. +- Patch the base system and reboot if required. +- Decide whether the host needs Docker, libvirt, Cockpit, or NVIDIA support. +- Review application ports and VM networking before enabling UFW. +- Confirm backups exist for any pre-existing host configuration. + +## Bootstrap + +Start with the least capability required: + +```bash +sudo ./install.sh --base --shell +``` + +Add reviewed platform profiles: + +```bash +sudo ./install.sh --cockpit --docker --libvirt --nvidia-tools --tuning --security +``` + +Do not select `--enable-ufw` until remote access and application rules are +understood. Do not install an NVIDIA driver until hardware, kernel, Secure Boot, +and workload compatibility are known. + +## Post-bootstrap evidence + +- Review all installer warnings. +- Run `systemctl --failed`. +- Confirm expected services with `systemctl status`. +- Review `ss -tulpn`, `df -hT`, `ip -brief address`, and `ip route`. +- Confirm Docker with `docker version` and `docker compose version`. +- Confirm libvirt with `virsh list --all` and `virsh net-list --all`. +- Confirm GPU state with `lspci -nn | grep -i nvidia` and `nvidia-smi`. +- Reboot after driver installation and repeat the postcheck. + +## Handover + +Document host-specific storage, network, firewall, backup, application, GPU, +and VM decisions. Install the separate `ailab-maintenance` toolkit only after +reviewing its scheduled day-2 behavior. diff --git a/labs/linux/setup/docs/libvirt.md b/labs/linux/setup/docs/libvirt.md new file mode 100644 index 0000000..1271d6c --- /dev/null +++ b/labs/linux/setup/docs/libvirt.md @@ -0,0 +1,54 @@ +# libvirt and KVM + +## Purpose + +The libvirt profile installs QEMU/KVM administration, UEFI firmware, software +TPM support, VM creation tools, bridge utilities, and the libvirt daemon. This +supports later Linux or Windows 11 VM work without defining guests. + +## Firmware pre-checks + +Confirm CPU virtualization is enabled: + +```bash +lscpu | grep -E 'Virtualization|Hypervisor' +grep -Eom1 '(vmx|svm)' /proc/cpuinfo +``` + +IOMMU and GPU passthrough require separate firmware, kernel command-line, +device isolation, driver binding, and recovery planning. This toolkit reports +hints but does not apply those changes. + +## Validation + +```bash +systemctl status libvirtd +virsh list --all +virsh net-list --all +virsh pool-list --all +``` + +Use `virt-host-validate` when available for a broader host capability report. +Desktop use of `virt-manager` requires a graphical environment or remote +display strategy. + +## Networking and Windows 11 + +The default libvirt NAT network is distinct from host bridge networking. Review +DHCP, DNS, forwarding, and firewall behavior before changing it. + +Windows 11 typically needs UEFI and a TPM device. The installed OVMF and swtpm +packages provide those building blocks, but guest creation and licensing remain +manual. + +## Troubleshooting + +```bash +journalctl -u libvirtd +virsh net-info default +virsh pool-list --all +lsmod | grep kvm +``` + +Disabling `libvirtd` does not remove VM disks or definitions. Package removal +and VM data deletion are intentionally outside this toolkit. diff --git a/labs/linux/setup/docs/nvidia.md b/labs/linux/setup/docs/nvidia.md new file mode 100644 index 0000000..1a059fa --- /dev/null +++ b/labs/linux/setup/docs/nvidia.md @@ -0,0 +1,52 @@ +# NVIDIA Tooling + +## Diagnostic-only default + +The normal NVIDIA profile installs `nvtop`, `clinfo`, and PCI utilities. It +does not install or select a driver: + +```bash +sudo ./install.sh --nvidia-tools +``` + +Review hardware and current module state: + +```bash +lspci -nn | grep -i nvidia +nvidia-smi +dkms status +mokutil --sb-state +``` + +## Explicit driver installation + +Install only a reviewed Ubuntu driver package version: + +```bash +sudo ./install.sh --install-nvidia-driver 550 +``` + +The numeric value maps directly to `nvidia-driver-VERSION`. The profile refuses +an unavailable package. Reboot after installation, then validate `nvidia-smi`, +kernel logs, DKMS state, and application behavior. + +## Selection considerations + +- GPU generation and supported driver branch. +- Ubuntu release, kernel, and HWE stack. +- Secure Boot module enrollment. +- CUDA or application compatibility. +- Docker NVIDIA Container Toolkit requirements. +- Whether the device will be bound to VFIO instead of the host driver. + +## Troubleshooting + +```bash +journalctl -k | grep -Ei 'nvidia|nouveau|NVRM' +lsmod | grep -E 'nvidia|nouveau' +dkms status +apt-cache policy 'nvidia-driver-*' +``` + +Driver rollback is environment-specific and is not automated. Preserve console +access and a known-good kernel before changing GPU or Secure Boot configuration. diff --git a/labs/linux/setup/files/bashrc.d/ailab.sh b/labs/linux/setup/files/bashrc.d/ailab.sh new file mode 100644 index 0000000..a73589f --- /dev/null +++ b/labs/linux/setup/files/bashrc.d/ailab.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +# AI lab operational shell helpers. This file is intended to be sourced. + +alias ll='ls -alF' +alias la='ls -A' +alias ports='ss -tulpn' +alias dus='du -xhd1 2>/dev/null | sort -h' +alias dufh='df -hT' +alias failed='systemctl --failed --no-pager' +alias jerr='journalctl -p err -b --no-pager' +alias timers='systemctl list-timers --all --no-pager' +alias dps='command -v docker >/dev/null 2>&1 && docker ps --format "table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}" || printf "Docker is not installed\n"' +alias ddf='command -v docker >/dev/null 2>&1 && docker system df || printf "Docker is not installed\n"' +alias dcu='command -v docker >/dev/null 2>&1 && docker compose up -d || printf "Docker Compose is not available\n"' +alias vms='command -v virsh >/dev/null 2>&1 && virsh list --all || printf "virsh is not installed\n"' +alias gpu='command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi || printf "nvidia-smi is not installed\n"' +alias gpuloop='command -v nvidia-smi >/dev/null 2>&1 && watch -n 2 nvidia-smi || printf "nvidia-smi is not installed\n"' +alias now='date "+%Y-%m-%d %H:%M:%S %Z"' + +svc_status() { + if (($# != 1)); then + printf 'Usage: svc_status SERVICE\n' >&2 + return 2 + fi + systemctl status "$1" --no-pager +} + +svc_logs() { + if (($# < 1 || $# > 2)); then + printf 'Usage: svc_logs SERVICE [LINES]\n' >&2 + return 2 + fi + local lines="${2:-100}" + [[ "$lines" =~ ^[0-9]+$ ]] || { + printf 'LINES must be numeric\n' >&2 + return 2 + } + journalctl -u "$1" -n "$lines" --no-pager +} + +docker_logs() { + if (($# < 1 || $# > 2)); then + printf 'Usage: docker_logs CONTAINER [LINES]\n' >&2 + return 2 + fi + command -v docker >/dev/null 2>&1 || { + printf 'Docker is not installed\n' >&2 + return 1 + } + local lines="${2:-100}" + [[ "$lines" =~ ^[0-9]+$ ]] || { + printf 'LINES must be numeric\n' >&2 + return 2 + } + docker logs --tail "$lines" "$1" +} + +docker_restart() { + if (($# != 1)); then + printf 'Usage: docker_restart CONTAINER\n' >&2 + return 2 + fi + command -v docker >/dev/null 2>&1 || { + printf 'Docker is not installed\n' >&2 + return 1 + } + docker restart "$1" +} + +vm_autostart() { + if (($# != 1)); then + printf 'Usage: vm_autostart VM\n' >&2 + return 2 + fi + command -v virsh >/dev/null 2>&1 || { + printf 'virsh is not installed\n' >&2 + return 1 + } + virsh autostart "$1" +} + +vm_no_autostart() { + if (($# != 1)); then + printf 'Usage: vm_no_autostart VM\n' >&2 + return 2 + fi + command -v virsh >/dev/null 2>&1 || { + printf 'virsh is not installed\n' >&2 + return 1 + } + virsh autostart --disable "$1" +} + +path_backup() { + if (($# != 1)); then + printf 'Usage: path_backup PATH\n' >&2 + return 2 + fi + if [[ ! -e "$1" ]]; then + printf 'Path does not exist: %s\n' "$1" >&2 + return 1 + fi + local destination + destination="${1%/}.$(date '+%Y%m%d-%H%M%S').bak" + cp -a -- "$1" "$destination" + printf 'Backup created: %s\n' "$destination" +} + +extract() { + if (($# != 1)); then + printf 'Usage: extract ARCHIVE\n' >&2 + return 2 + fi + if [[ ! -f "$1" ]]; then + printf 'Archive does not exist: %s\n' "$1" >&2 + return 1 + fi + case "$1" in + *.tar.bz2|*.tbz2) tar xjf "$1" ;; + *.tar.gz|*.tgz) tar xzf "$1" ;; + *.tar.xz|*.txz) tar xJf "$1" ;; + *.tar) tar xf "$1" ;; + *.bz2) bunzip2 "$1" ;; + *.gz) gunzip "$1" ;; + *.zip) unzip "$1" ;; + *.7z) 7z x "$1" ;; + *.rar) unrar x "$1" ;; + *) + printf 'Unsupported archive type: %s\n' "$1" >&2 + return 2 + ;; + esac +} diff --git a/labs/linux/setup/files/docker/daemon.json b/labs/linux/setup/files/docker/daemon.json new file mode 100644 index 0000000..6e49d82 --- /dev/null +++ b/labs/linux/setup/files/docker/daemon.json @@ -0,0 +1,7 @@ +{ + "log-driver": "json-file", + "log-opts": { + "max-size": "50m", + "max-file": "5" + } +} diff --git a/labs/linux/setup/files/sysctl/99-ailab.conf b/labs/linux/setup/files/sysctl/99-ailab.conf new file mode 100644 index 0000000..89d4686 --- /dev/null +++ b/labs/linux/setup/files/sysctl/99-ailab.conf @@ -0,0 +1,3 @@ +fs.inotify.max_user_watches=1048576 +fs.inotify.max_user_instances=1024 +vm.swappiness=10 diff --git a/labs/linux/setup/files/systemd/journald-ailab-limits.conf b/labs/linux/setup/files/systemd/journald-ailab-limits.conf new file mode 100644 index 0000000..9a88532 --- /dev/null +++ b/labs/linux/setup/files/systemd/journald-ailab-limits.conf @@ -0,0 +1,5 @@ +[Journal] +SystemMaxUse=1G +SystemKeepFree=2G +MaxRetentionSec=14day +Compress=yes diff --git a/labs/linux/setup/install.sh b/labs/linux/setup/install.sh new file mode 100755 index 0000000..7e66915 --- /dev/null +++ b/labs/linux/setup/install.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +run_base=0 +run_shell=0 +run_cockpit=0 +run_docker=0 +run_libvirt=0 +run_nvidia=0 +run_tuning=0 +run_security=0 +enable_ufw=0 +nvidia_driver_version="" + +usage() { + cat <<'EOF' +Usage: sudo ./install.sh [OPTIONS] + +Day-0 bootstrap automation for Ubuntu 24.04 or newer. + +Profiles: + --base Install baseline operational packages + --shell Install the root shell profile + --cockpit Install and enable Cockpit + --docker Install and configure Docker + --libvirt Install and enable libvirt/KVM + --nvidia-tools Install NVIDIA diagnostic tools only + --install-nvidia-driver VERSION + Install diagnostic tools and the explicit driver + --tuning Install journald and sysctl tuning + --security Install fail2ban and UFW; do not enable UFW + --enable-ufw Run security profile and explicitly enable UFW + --all Run every profile without enabling UFW or + installing an NVIDIA driver + -h, --help Show this help + +Examples: + sudo ./install.sh --base --shell + sudo ./install.sh --all + sudo ./install.sh --all --enable-ufw + sudo ./install.sh --nvidia-tools --install-nvidia-driver 550 +EOF +} + +require_supported_ubuntu() { + if [[ ! -r /etc/os-release ]]; then + printf 'CRITICAL: /etc/os-release is unavailable; refusing system changes\n' >&2 + exit 2 + fi + + # shellcheck disable=SC1091 + source /etc/os-release + if [[ "${ID:-}" != "ubuntu" ]]; then + printf 'CRITICAL: this toolkit supports Ubuntu only; detected %s\n' "${ID:-unknown}" >&2 + exit 2 + fi + if ! dpkg --compare-versions "${VERSION_ID:-0}" ge "24.04"; then + printf 'CRITICAL: Ubuntu 24.04 or newer is required; detected %s\n' \ + "${VERSION_ID:-unknown}" >&2 + exit 2 + fi +} + +if (($# == 0)); then + usage + exit 0 +fi + +while (($# > 0)); do + case "$1" in + --base) + run_base=1 + ;; + --shell) + run_shell=1 + ;; + --cockpit) + run_cockpit=1 + ;; + --docker) + run_docker=1 + ;; + --libvirt) + run_libvirt=1 + ;; + --nvidia-tools) + run_nvidia=1 + ;; + --install-nvidia-driver) + if (($# < 2)); then + printf 'CRITICAL: --install-nvidia-driver requires a VERSION\n' >&2 + exit 2 + fi + nvidia_driver_version="$2" + if [[ ! "$nvidia_driver_version" =~ ^[0-9]+$ ]]; then + printf 'CRITICAL: NVIDIA driver VERSION must contain digits only\n' >&2 + exit 2 + fi + run_nvidia=1 + shift + ;; + --tuning) + run_tuning=1 + ;; + --security) + run_security=1 + ;; + --enable-ufw) + enable_ufw=1 + run_security=1 + ;; + --all) + run_base=1 + run_shell=1 + run_cockpit=1 + run_docker=1 + run_libvirt=1 + run_nvidia=1 + run_tuning=1 + run_security=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + printf 'CRITICAL: unknown option: %s\n\n' "$1" >&2 + usage >&2 + exit 2 + ;; + esac + shift +done + +if ((EUID != 0)); then + printf 'CRITICAL: install.sh must run as root for selected profiles\n' >&2 + exit 2 +fi + +for required_command in bash dpkg; do + if ! command -v "$required_command" >/dev/null 2>&1; then + printf 'CRITICAL: required command is missing: %s\n' "$required_command" >&2 + exit 2 + fi +done + +require_supported_ubuntu + +printf 'INFO: running read-only preflight\n' +"$SCRIPT_DIR/scripts/00-preflight.sh" + +((run_base == 0)) || "$SCRIPT_DIR/scripts/01-base-packages.sh" +((run_shell == 0)) || "$SCRIPT_DIR/scripts/02-shell-profile.sh" +((run_cockpit == 0)) || "$SCRIPT_DIR/scripts/03-cockpit.sh" +((run_docker == 0)) || "$SCRIPT_DIR/scripts/04-docker.sh" +((run_libvirt == 0)) || "$SCRIPT_DIR/scripts/05-libvirt.sh" + +if ((run_nvidia == 1)); then + if [[ -n "$nvidia_driver_version" ]]; then + "$SCRIPT_DIR/scripts/06-nvidia-tools.sh" --install-driver "$nvidia_driver_version" + else + "$SCRIPT_DIR/scripts/06-nvidia-tools.sh" + fi +fi + +((run_tuning == 0)) || "$SCRIPT_DIR/scripts/07-tuning.sh" + +if ((run_security == 1)); then + if ((enable_ufw == 1)); then + "$SCRIPT_DIR/scripts/08-security-baseline.sh" --enable-ufw + else + "$SCRIPT_DIR/scripts/08-security-baseline.sh" + fi +fi + +printf '\nINFO: running post-install checks\n' +"$SCRIPT_DIR/scripts/99-postcheck.sh" +printf '\nOK: selected Linux setup profiles completed\n' diff --git a/labs/linux/setup/scripts/00-platform-guard.inc b/labs/linux/setup/scripts/00-platform-guard.inc new file mode 100644 index 0000000..2683ee1 --- /dev/null +++ b/labs/linux/setup/scripts/00-platform-guard.inc @@ -0,0 +1,20 @@ +# shellcheck shell=bash + +require_supported_ubuntu() { + if [[ ! -r /etc/os-release ]] || ! command -v dpkg >/dev/null 2>&1; then + printf 'CRITICAL: Ubuntu release detection requires /etc/os-release and dpkg\n' >&2 + exit 2 + fi + + # shellcheck disable=SC1091 + source /etc/os-release + if [[ "${ID:-}" != "ubuntu" ]]; then + printf 'CRITICAL: this toolkit supports Ubuntu only; detected %s\n' "${ID:-unknown}" >&2 + exit 2 + fi + if ! dpkg --compare-versions "${VERSION_ID:-0}" ge "24.04"; then + printf 'CRITICAL: Ubuntu 24.04 or newer is required; detected %s\n' \ + "${VERSION_ID:-unknown}" >&2 + exit 2 + fi +} diff --git a/labs/linux/setup/scripts/00-preflight.sh b/labs/linux/setup/scripts/00-preflight.sh new file mode 100755 index 0000000..626d845 --- /dev/null +++ b/labs/linux/setup/scripts/00-preflight.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +section() { + printf '\n== %s ==\n' "$1" +} + +run_optional() { + local description="$1" + shift + + if "$@"; then + return 0 + fi + printf 'WARNING: %s failed\n' "$description" + return 0 +} + +section "Operating system" +if [[ -r /etc/os-release ]]; then + run_optional "OS release report" cat /etc/os-release +else + printf 'WARNING: /etc/os-release is unavailable\n' +fi +run_optional "kernel report" uname -a + +section "Host" +run_optional "hostname report" hostname +run_optional "uptime report" uptime + +section "CPU and virtualization" +if command -v lscpu >/dev/null 2>&1; then + run_optional "CPU report" lscpu + printf '\nVirtualization flags:\n' + lscpu | grep -E 'Virtualization|Hypervisor vendor' || \ + printf 'INFO: no virtualization summary reported by lscpu\n' +else + printf 'WARNING: lscpu is unavailable\n' +fi +if grep -Eqm1 '(^|[[:space:]])(vmx|svm)([[:space:]]|$)' /proc/cpuinfo; then + printf 'OK: CPU virtualization flags detected\n' +else + printf 'WARNING: CPU virtualization flags were not detected\n' +fi + +section "Memory" +if command -v free >/dev/null 2>&1; then + run_optional "memory report" free -h +else + run_optional "memory report" cat /proc/meminfo +fi + +section "Disks" +if command -v lsblk >/dev/null 2>&1; then + run_optional "block device report" lsblk -o NAME,TYPE,SIZE,FSTYPE,MOUNTPOINTS,MODEL +else + printf 'WARNING: lsblk is unavailable\n' +fi +run_optional "filesystem report" df -hT + +section "Network" +if command -v ip >/dev/null 2>&1; then + run_optional "network interface report" ip -brief address + run_optional "route report" ip route +else + printf 'WARNING: ip is unavailable\n' +fi + +section "Firmware and Secure Boot" +if [[ -d /sys/firmware/efi ]]; then + printf 'OK: boot mode is UEFI\n' +else + printf 'INFO: boot mode appears to be legacy BIOS\n' +fi +if command -v mokutil >/dev/null 2>&1; then + run_optional "Secure Boot report" mokutil --sb-state +else + printf 'INFO: mokutil is unavailable; Secure Boot state not queried\n' +fi + +section "IOMMU" +if [[ -r /proc/cmdline ]]; then + printf 'Kernel command line:\n' + cat /proc/cmdline + if grep -Eq '(^|[[:space:]])(intel_iommu=on|amd_iommu=on|iommu=)' /proc/cmdline; then + printf 'OK: IOMMU-related kernel arguments detected\n' + else + printf 'INFO: no explicit IOMMU kernel argument detected\n' + fi +fi +if command -v dmesg >/dev/null 2>&1; then + dmesg 2>/dev/null | grep -Ei 'DMAR|IOMMU|AMD-Vi' | tail -n 30 || \ + printf 'INFO: no readable IOMMU hints found in dmesg\n' +fi + +section "NVIDIA hardware" +if command -v lspci >/dev/null 2>&1; then + lspci -nn | grep -i nvidia || printf 'INFO: no NVIDIA PCI devices detected\n' +else + printf 'INFO: lspci is unavailable\n' +fi + +section "Existing platform components" +for command_name in docker virsh cockpit-bridge; do + if command -v "$command_name" >/dev/null 2>&1; then + printf 'OK: %s is installed at %s\n' "$command_name" "$(command -v "$command_name")" + else + printf 'INFO: %s is not installed\n' "$command_name" + fi +done +if command -v systemctl >/dev/null 2>&1; then + for unit in docker.service libvirtd.service cockpit.socket; do + if systemctl cat "$unit" >/dev/null 2>&1; then + state="$(systemctl is-active "$unit" 2>/dev/null || true)" + printf 'INFO: %-20s state=%s\n' "$unit" "${state:-unknown}" + else + printf 'INFO: %s is not installed\n' "$unit" + fi + done +fi + +printf '\nOK: preflight completed without modifying the host\n' diff --git a/labs/linux/setup/scripts/01-base-packages.sh b/labs/linux/setup/scripts/01-base-packages.sh new file mode 100755 index 0000000..f811c96 --- /dev/null +++ b/labs/linux/setup/scripts/01-base-packages.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" + +packages=( + curl wget git vim nano tmux byobu htop btop glances + jq unzip zip rsync tree ncdu duf + lsof strace tcpdump nmap dnsutils net-tools iperf3 ethtool + smartmontools nvme-cli lm-sensors pciutils usbutils hwinfo + sysstat iotop iftop nload + ca-certificates gnupg software-properties-common apt-transport-https + needrestart unattended-upgrades logrotate +) + +if ((EUID != 0)); then + printf 'CRITICAL: base package setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +if ! command -v apt-get >/dev/null 2>&1; then + printf 'CRITICAL: apt-get is required\n' >&2 + exit 2 +fi + +printf 'INFO: refreshing APT metadata\n' +apt-get update +printf 'INFO: installing baseline operational packages\n' +DEBIAN_FRONTEND=noninteractive apt-get install -y "${packages[@]}" + +if command -v systemctl >/dev/null 2>&1; then + systemctl enable --now sysstat +else + printf 'WARNING: systemctl is unavailable; sysstat was not enabled\n' +fi + +printf 'OK: baseline operational packages are installed\n' diff --git a/labs/linux/setup/scripts/02-shell-profile.sh b/labs/linux/setup/scripts/02-shell-profile.sh new file mode 100755 index 0000000..d6899be --- /dev/null +++ b/labs/linux/setup/scripts/02-shell-profile.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" +SOURCE_FILE="$SCRIPT_DIR/../files/bashrc.d/ailab.sh" +PROFILE_DIR="/root/.bashrc.d" +PROFILE_FILE="$PROFILE_DIR/ailab.sh" +BASHRC="/root/.bashrc" +SOURCE_LINE='[[ -f /root/.bashrc.d/ailab.sh ]] && source /root/.bashrc.d/ailab.sh' + +backup_file() { + local path="$1" + local backup + + backup="${path}.$(date '+%Y%m%d-%H%M%S').bak" + install -m 0644 "$path" "$backup" + printf 'INFO: backed up %s to %s\n' "$path" "$backup" +} + +if ((EUID != 0)); then + printf 'CRITICAL: shell profile setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +if [[ ! -r "$SOURCE_FILE" ]]; then + printf 'CRITICAL: shell profile source is missing: %s\n' "$SOURCE_FILE" >&2 + exit 2 +fi + +install -d -m 0755 "$PROFILE_DIR" +if [[ ! -f "$PROFILE_FILE" ]] || ! cmp -s "$SOURCE_FILE" "$PROFILE_FILE"; then + if [[ -f "$PROFILE_FILE" ]]; then + backup_file "$PROFILE_FILE" + fi + install -m 0644 "$SOURCE_FILE" "$PROFILE_FILE" + printf 'OK: installed %s\n' "$PROFILE_FILE" +else + printf 'OK: shell profile is already current\n' +fi + +if [[ ! -f "$BASHRC" ]]; then + install -m 0644 /dev/null "$BASHRC" +fi + +source_count="$(grep -Fxc "$SOURCE_LINE" "$BASHRC" || true)" +if [[ "$source_count" != "1" ]]; then + tmp_bashrc="$(mktemp)" + trap 'rm -f "$tmp_bashrc"' EXIT + grep -Fvx "$SOURCE_LINE" "$BASHRC" >"$tmp_bashrc" || true + printf '\n%s\n' "$SOURCE_LINE" >>"$tmp_bashrc" + backup_file "$BASHRC" + install -m 0644 "$tmp_bashrc" "$BASHRC" + printf 'OK: configured %s to source the AI lab profile exactly once\n' "$BASHRC" +else + printf 'OK: %s already sources the AI lab profile exactly once\n' "$BASHRC" +fi diff --git a/labs/linux/setup/scripts/03-cockpit.sh b/labs/linux/setup/scripts/03-cockpit.sh new file mode 100755 index 0000000..58cf331 --- /dev/null +++ b/labs/linux/setup/scripts/03-cockpit.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" + +required_packages=( + cockpit cockpit-system cockpit-storaged cockpit-networkmanager + cockpit-packagekit cockpit-machines cockpit-sosreport cockpit-pcp +) + +if ((EUID != 0)); then + printf 'CRITICAL: Cockpit setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +if ! command -v apt-get >/dev/null 2>&1; then + printf 'CRITICAL: apt-get is required\n' >&2 + exit 2 +fi + +apt-get update +DEBIAN_FRONTEND=noninteractive apt-get install -y "${required_packages[@]}" + +if apt-cache show cockpit-files >/dev/null 2>&1; then + DEBIAN_FRONTEND=noninteractive apt-get install -y cockpit-files + printf 'OK: installed optional cockpit-files package\n' +else + printf 'WARNING: cockpit-files is unavailable; continuing without it\n' +fi + +systemctl enable --now cockpit.socket +printf 'OK: Cockpit is enabled at https://%s:9090\n' "$(hostname)" diff --git a/labs/linux/setup/scripts/04-docker.sh b/labs/linux/setup/scripts/04-docker.sh new file mode 100755 index 0000000..ae05e29 --- /dev/null +++ b/labs/linux/setup/scripts/04-docker.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" +SOURCE_CONFIG="$SCRIPT_DIR/../files/docker/daemon.json" +DOCKER_CONFIG="/etc/docker/daemon.json" +temporary_files=() + +cleanup() { + local path + + for path in "${temporary_files[@]}"; do + rm -f "$path" + done +} + +trap cleanup EXIT + +backup_file() { + local path="$1" + local backup + + backup="${path}.$(date '+%Y%m%d-%H%M%S').bak" + install -m 0644 "$path" "$backup" + printf 'INFO: backed up %s to %s\n' "$path" "$backup" +} + +if ((EUID != 0)); then + printf 'CRITICAL: Docker setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +for command_name in apt-get apt-cache; do + if ! command -v "$command_name" >/dev/null 2>&1; then + printf 'CRITICAL: required command is missing: %s\n' "$command_name" >&2 + exit 2 + fi +done + +apt-get update +DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg jq + +if apt-cache show docker.io >/dev/null 2>&1; then + packages=(docker.io) + if apt-cache show docker-compose-v2 >/dev/null 2>&1; then + packages+=(docker-compose-v2) + else + printf 'WARNING: docker-compose-v2 is unavailable from Ubuntu repositories\n' + fi +else + printf 'WARNING: docker.io is unavailable; configuring Docker official repository\n' + install -d -m 0755 /etc/apt/keyrings + tmp_key="$(mktemp)" + temporary_files+=("$tmp_key") + curl -fsSL https://download.docker.com/linux/ubuntu/gpg \ + | gpg --dearmor --yes -o "$tmp_key" + if [[ ! -f /etc/apt/keyrings/docker.gpg ]] || \ + ! cmp -s "$tmp_key" /etc/apt/keyrings/docker.gpg; then + if [[ -f /etc/apt/keyrings/docker.gpg ]]; then + backup_file /etc/apt/keyrings/docker.gpg + fi + install -m 0644 "$tmp_key" /etc/apt/keyrings/docker.gpg + fi + + # shellcheck disable=SC1091 + source /etc/os-release + architecture="$(dpkg --print-architecture)" + tmp_repository="$(mktemp)" + temporary_files+=("$tmp_repository") + printf 'deb [arch=%s signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu %s stable\n' \ + "$architecture" "${VERSION_CODENAME:?}" \ + >"$tmp_repository" + if [[ ! -f /etc/apt/sources.list.d/docker.list ]] || \ + ! cmp -s "$tmp_repository" /etc/apt/sources.list.d/docker.list; then + if [[ -f /etc/apt/sources.list.d/docker.list ]]; then + backup_file /etc/apt/sources.list.d/docker.list + fi + install -m 0644 "$tmp_repository" /etc/apt/sources.list.d/docker.list + fi + apt-get update + packages=(docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin) +fi + +DEBIAN_FRONTEND=noninteractive apt-get install -y "${packages[@]}" +install -d -m 0755 /etc/docker + +if [[ ! -r "$SOURCE_CONFIG" ]]; then + printf 'CRITICAL: Docker configuration template is missing: %s\n' "$SOURCE_CONFIG" >&2 + exit 2 +fi +jq empty "$SOURCE_CONFIG" + +tmp_config="$(mktemp)" +temporary_files+=("$tmp_config") +if [[ -f "$DOCKER_CONFIG" ]]; then + if ! jq empty "$DOCKER_CONFIG" >/dev/null 2>&1; then + printf 'CRITICAL: %s is invalid JSON; refusing to overwrite it\n' "$DOCKER_CONFIG" >&2 + exit 1 + fi + jq '. + { + "log-driver": "json-file", + "log-opts": ((."log-opts" // {}) + {"max-size": "50m", "max-file": "5"}) + }' "$DOCKER_CONFIG" >"$tmp_config" +else + install -m 0644 "$SOURCE_CONFIG" "$tmp_config" +fi +jq empty "$tmp_config" + +config_changed=0 +if [[ ! -f "$DOCKER_CONFIG" ]] || ! cmp -s "$tmp_config" "$DOCKER_CONFIG"; then + if [[ -f "$DOCKER_CONFIG" ]]; then + backup_file "$DOCKER_CONFIG" + fi + install -m 0644 "$tmp_config" "$DOCKER_CONFIG" + config_changed=1 + printf 'OK: installed Docker daemon log limits\n' +else + printf 'OK: Docker daemon configuration is already current\n' +fi + +systemctl enable --now docker +if ((config_changed == 1)); then + systemctl restart docker +fi + +docker version +if docker compose version >/dev/null 2>&1; then + docker compose version +else + printf 'WARNING: Docker Compose v2 is unavailable\n' +fi +printf 'OK: Docker setup completed\n' diff --git a/labs/linux/setup/scripts/05-libvirt.sh b/labs/linux/setup/scripts/05-libvirt.sh new file mode 100755 index 0000000..3013ae4 --- /dev/null +++ b/labs/linux/setup/scripts/05-libvirt.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" + +packages=( + qemu-system-x86 qemu-utils libvirt-daemon-system libvirt-clients + virtinst virt-manager bridge-utils ovmf swtpm swtpm-tools dnsmasq-base +) + +if ((EUID != 0)); then + printf 'CRITICAL: libvirt setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +if ! command -v apt-get >/dev/null 2>&1; then + printf 'CRITICAL: apt-get is required\n' >&2 + exit 2 +fi + +apt-get update +DEBIAN_FRONTEND=noninteractive apt-get install -y "${packages[@]}" +systemctl enable --now libvirtd + +printf '\n== Virtual machines ==\n' +virsh list --all || printf 'WARNING: unable to list virtual machines\n' +printf '\n== Virtual networks ==\n' +virsh net-list --all || printf 'WARNING: unable to list virtual networks\n' +printf 'OK: libvirt/KVM setup completed\n' diff --git a/labs/linux/setup/scripts/06-nvidia-tools.sh b/labs/linux/setup/scripts/06-nvidia-tools.sh new file mode 100755 index 0000000..ea8320e --- /dev/null +++ b/labs/linux/setup/scripts/06-nvidia-tools.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" + +driver_version="" + +usage() { + cat <<'EOF' +Usage: sudo ./06-nvidia-tools.sh [--install-driver VERSION] + +Without --install-driver, only non-driver diagnostic tools are installed. +EOF +} + +while (($# > 0)); do + case "$1" in + --install-driver) + if (($# < 2)); then + printf 'CRITICAL: --install-driver requires a VERSION\n' >&2 + exit 2 + fi + driver_version="$2" + if [[ ! "$driver_version" =~ ^[0-9]+$ ]]; then + printf 'CRITICAL: NVIDIA driver VERSION must contain digits only\n' >&2 + exit 2 + fi + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + printf 'CRITICAL: unknown option: %s\n' "$1" >&2 + exit 2 + ;; + esac + shift +done + +if ((EUID != 0)); then + printf 'CRITICAL: NVIDIA tooling setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +if ! command -v apt-get >/dev/null 2>&1; then + printf 'CRITICAL: apt-get is required\n' >&2 + exit 2 +fi + +apt-get update +DEBIAN_FRONTEND=noninteractive apt-get install -y nvtop clinfo pciutils + +printf '\n== NVIDIA PCI devices ==\n' +lspci -nn | grep -i nvidia || printf 'INFO: no NVIDIA PCI devices detected\n' + +printf '\n== NVIDIA runtime ==\n' +if command -v nvidia-smi >/dev/null 2>&1; then + nvidia-smi || printf 'WARNING: nvidia-smi returned an error\n' +else + printf 'INFO: nvidia-smi is not installed\n' +fi + +printf '\n== DKMS ==\n' +if command -v dkms >/dev/null 2>&1; then + dkms status || printf 'WARNING: dkms status returned an error\n' +else + printf 'INFO: dkms is not installed\n' +fi + +if [[ -n "$driver_version" ]]; then + driver_package="nvidia-driver-$driver_version" + if ! apt-cache show "$driver_package" >/dev/null 2>&1; then + printf 'CRITICAL: requested NVIDIA driver package is unavailable: %s\n' \ + "$driver_package" >&2 + exit 1 + fi + DEBIAN_FRONTEND=noninteractive apt-get install -y "$driver_package" + printf 'WARNING: NVIDIA driver %s was installed; reboot before validation\n' \ + "$driver_version" +else + printf 'OK: NVIDIA diagnostic tools installed; no driver was installed\n' +fi diff --git a/labs/linux/setup/scripts/07-tuning.sh b/labs/linux/setup/scripts/07-tuning.sh new file mode 100755 index 0000000..3fd5351 --- /dev/null +++ b/labs/linux/setup/scripts/07-tuning.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" +JOURNAL_SOURCE="$SCRIPT_DIR/../files/systemd/journald-ailab-limits.conf" +JOURNAL_DEST="/etc/systemd/journald.conf.d/ailab-limits.conf" +SYSCTL_SOURCE="$SCRIPT_DIR/../files/sysctl/99-ailab.conf" +SYSCTL_DEST="/etc/sysctl.d/99-ailab.conf" + +install_config() { + local source_path="$1" + local destination_path="$2" + local mode="$3" + local backup + + install -d -m 0755 "$(dirname "$destination_path")" + if [[ -f "$destination_path" ]] && cmp -s "$source_path" "$destination_path"; then + printf 'OK: %s is already current\n' "$destination_path" + return 0 + fi + if [[ -f "$destination_path" ]]; then + backup="${destination_path}.$(date '+%Y%m%d-%H%M%S').bak" + install -m "$mode" "$destination_path" "$backup" + printf 'INFO: backed up %s to %s\n' "$destination_path" "$backup" + fi + install -m "$mode" "$source_path" "$destination_path" + printf 'OK: installed %s\n' "$destination_path" +} + +if ((EUID != 0)); then + printf 'CRITICAL: tuning setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +for source_path in "$JOURNAL_SOURCE" "$SYSCTL_SOURCE"; do + if [[ ! -r "$source_path" ]]; then + printf 'CRITICAL: required configuration is missing: %s\n' "$source_path" >&2 + exit 2 + fi +done + +if ! command -v sysctl >/dev/null 2>&1 || ! command -v systemctl >/dev/null 2>&1; then + printf 'CRITICAL: sysctl and systemctl are required\n' >&2 + exit 2 +fi + +if ! command -v sensors-detect >/dev/null 2>&1 || \ + ! systemctl cat sysstat.service >/dev/null 2>&1; then + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y lm-sensors sysstat +fi + +install_config "$JOURNAL_SOURCE" "$JOURNAL_DEST" 0644 +install_config "$SYSCTL_SOURCE" "$SYSCTL_DEST" 0644 + +sysctl --system +systemctl restart systemd-journald +systemctl enable --now sysstat + +if command -v sensors-detect >/dev/null 2>&1; then + sensors-detect --auto || printf 'WARNING: sensors-detect did not complete successfully\n' +fi +printf 'OK: host tuning completed\n' diff --git a/labs/linux/setup/scripts/08-security-baseline.sh b/labs/linux/setup/scripts/08-security-baseline.sh new file mode 100755 index 0000000..7117223 --- /dev/null +++ b/labs/linux/setup/scripts/08-security-baseline.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=00-platform-guard.inc +source "$SCRIPT_DIR/00-platform-guard.inc" + +enable_ufw=0 + +usage() { + cat <<'EOF' +Usage: sudo ./08-security-baseline.sh [--enable-ufw] + +Installs fail2ban and UFW. UFW is enabled only with the explicit flag. +EOF +} + +while (($# > 0)); do + case "$1" in + --enable-ufw) + enable_ufw=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + printf 'CRITICAL: unknown option: %s\n' "$1" >&2 + exit 2 + ;; + esac + shift +done + +if ((EUID != 0)); then + printf 'CRITICAL: security baseline setup must run as root\n' >&2 + exit 2 +fi +require_supported_ubuntu +if ! command -v apt-get >/dev/null 2>&1; then + printf 'CRITICAL: apt-get is required\n' >&2 + exit 2 +fi + +apt-get update +DEBIAN_FRONTEND=noninteractive apt-get install -y fail2ban ufw +systemctl enable --now fail2ban + +if ((enable_ufw == 1)); then + printf 'WARNING: UFW was explicitly requested; SSH and Cockpit rules will be added before enablement\n' + ufw allow OpenSSH + ufw allow 9090/tcp comment 'Cockpit' + ufw --force enable +else + printf 'WARNING: UFW is installed but was not enabled; use --enable-ufw after reviewing access requirements\n' +fi + +ufw status verbose || printf 'WARNING: unable to read UFW status\n' +printf 'OK: security baseline completed\n' diff --git a/labs/linux/setup/scripts/99-postcheck.sh b/labs/linux/setup/scripts/99-postcheck.sh new file mode 100755 index 0000000..c94518d --- /dev/null +++ b/labs/linux/setup/scripts/99-postcheck.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -o errexit +set -o nounset +set -o pipefail + +section() { + printf '\n== %s ==\n' "$1" +} + +run_optional() { + local description="$1" + shift + + if "$@"; then + return 0 + fi + printf 'WARNING: %s failed\n' "$description" + return 0 +} + +section "Failed systemd units" +if command -v systemctl >/dev/null 2>&1; then + run_optional "failed systemd unit report" systemctl --failed --no-pager + + section "Selected service status" + for unit in cockpit.socket docker.service libvirtd.service fail2ban.service; do + if systemctl cat "$unit" >/dev/null 2>&1; then + run_optional "$unit status" systemctl status "$unit" --no-pager + else + printf 'INFO: %s is not installed\n' "$unit" + fi + done +else + printf 'WARNING: systemctl is unavailable\n' +fi + +section "Docker" +if command -v docker >/dev/null 2>&1; then + run_optional "Docker container list" docker ps +else + printf 'INFO: Docker is not installed\n' +fi + +section "Libvirt" +if command -v virsh >/dev/null 2>&1; then + run_optional "libvirt guest list" virsh list --all +else + printf 'INFO: virsh is not installed\n' +fi + +section "NVIDIA" +if command -v nvidia-smi >/dev/null 2>&1; then + run_optional "NVIDIA status" nvidia-smi +else + printf 'INFO: nvidia-smi is not installed\n' +fi + +section "Filesystems" +run_optional "filesystem report" df -hT + +section "Listening ports" +if command -v ss >/dev/null 2>&1; then + run_optional "listening port report" ss -tulpn +else + printf 'WARNING: ss is unavailable\n' +fi + +printf '\nOK: postcheck completed; review warnings above\n' +exit 0