From c596e38e9e39c3727051bd31af2048ea42c5aab4 Mon Sep 17 00:00:00 2001
From: Felipe Cardoso <felipe.cardoso@shootify.io>
Date: Wed, 25 Mar 2026 20:13:15 +0100
Subject: [PATCH] Initial commit

---
 .gitignore                        |   5 +
 .idea/.gitignore                  |  10 ++
 Makefile                          |  58 ++++++++
 bin/audit                         |  18 +++
 bin/benchmark                     |  20 +++
 bin/monitor                       |  20 +++
 bin/optimize                      |  31 +++++
 configs/grub-cmdline.conf         |  17 +++
 docs/bios-vram-guide.md           |  40 ++++++
 lib/common.sh                     |  52 +++++++
 lib/detect.sh                     | 197 ++++++++++++++++++++++++++
 lib/format.sh                     |  74 ++++++++++
 scripts/audit/quick-glance.sh     | 180 ++++++++++++++++++++++++
 scripts/audit/system-report.sh    | 194 ++++++++++++++++++++++++++
 scripts/benchmark/compare.sh      | 140 +++++++++++++++++++
 scripts/benchmark/run-baseline.sh | 223 ++++++++++++++++++++++++++++++
 scripts/benchmark/run-suite.sh    | 194 ++++++++++++++++++++++++++
 scripts/benchmark/setup.sh        | 106 ++++++++++++++
 scripts/monitor/dashboard.sh      |  90 ++++++++++++
 scripts/monitor/install-tools.sh  |  97 +++++++++++++
 scripts/monitor/log-metrics.sh    | 127 +++++++++++++++++
 scripts/optimize/kernel-params.sh | 149 ++++++++++++++++++++
 scripts/optimize/rollback.sh      |  67 +++++++++
 scripts/optimize/tuned-profile.sh |  56 ++++++++
 scripts/optimize/verify.sh        |  97 +++++++++++++
 scripts/optimize/vram-gtt.sh      |  83 +++++++++++
 26 files changed, 2345 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .idea/.gitignore
 create mode 100644 Makefile
 create mode 100755 bin/audit
 create mode 100755 bin/benchmark
 create mode 100755 bin/monitor
 create mode 100755 bin/optimize
 create mode 100644 configs/grub-cmdline.conf
 create mode 100644 docs/bios-vram-guide.md
 create mode 100644 lib/common.sh
 create mode 100644 lib/detect.sh
 create mode 100644 lib/format.sh
 create mode 100644 scripts/audit/quick-glance.sh
 create mode 100644 scripts/audit/system-report.sh
 create mode 100644 scripts/benchmark/compare.sh
 create mode 100644 scripts/benchmark/run-baseline.sh
 create mode 100644 scripts/benchmark/run-suite.sh
 create mode 100644 scripts/benchmark/setup.sh
 create mode 100644 scripts/monitor/dashboard.sh
 create mode 100644 scripts/monitor/install-tools.sh
 create mode 100644 scripts/monitor/log-metrics.sh
 create mode 100644 scripts/optimize/kernel-params.sh
 create mode 100644 scripts/optimize/rollback.sh
 create mode 100644 scripts/optimize/tuned-profile.sh
 create mode 100644 scripts/optimize/verify.sh
 create mode 100644 scripts/optimize/vram-gtt.sh

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d596613
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+data/
+*.log
+*.csv
+*.tmp
+.claude/
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..ab1f416
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,10 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Ignored default folder with query files
+/queries/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..495c019
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,58 @@
+.PHONY: help audit audit-full monitor monitor-simple benchmark benchmark-baseline benchmark-compare optimize verify
+
+help: ## Show available commands
+	@echo "Strix Halo Optimization Toolkit"
+	@echo ""
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-22s\033[0m %s\n", $$1, $$2}'
+
+# --- Audit ---
+audit: ## Quick system status (single screen)
+	@bash bin/audit --quick
+
+audit-full: ## Full system report (saved to data/audits/)
+	@bash bin/audit --full
+
+# --- Monitor ---
+monitor: ## Launch tmux monitoring dashboard
+	@bash bin/monitor --dashboard
+
+monitor-simple: ## Launch amdgpu_top only
+	@bash bin/monitor --simple
+
+monitor-install: ## Install monitoring tools (amdgpu_top, btop)
+	@bash scripts/monitor/install-tools.sh
+
+monitor-log: ## Start background metric logger
+	@bash bin/monitor --log
+
+# --- Benchmark ---
+benchmark-setup: ## Ensure toolboxes and test models are ready
+	@bash scripts/benchmark/setup.sh
+
+benchmark-baseline: ## Capture pre-optimization baseline
+	@bash bin/benchmark baseline
+
+benchmark: ## Run full benchmark suite
+	@bash bin/benchmark run
+
+benchmark-compare: ## Compare two benchmark runs (usage: make benchmark-compare BEFORE=dir AFTER=dir)
+	@bash bin/benchmark compare $(BEFORE) $(AFTER)
+
+# --- Optimize ---
+optimize: ## Interactive optimization walkthrough
+	@bash bin/optimize --all
+
+optimize-kernel: ## Configure kernel boot parameters
+	@bash scripts/optimize/kernel-params.sh
+
+optimize-tuned: ## Switch to accelerator-performance profile
+	@bash scripts/optimize/tuned-profile.sh
+
+optimize-vram: ## BIOS VRAM guidance + GTT verification
+	@bash scripts/optimize/vram-gtt.sh
+
+verify: ## Post-optimization verification checklist
+	@bash scripts/optimize/verify.sh
+
+rollback: ## Rollback optimizations
+	@bash scripts/optimize/rollback.sh
diff --git a/bin/audit b/bin/audit
new file mode 100755
index 0000000..a2d251a
--- /dev/null
+++ b/bin/audit
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+# System audit dispatcher
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+case "${1:---quick}" in
+    --quick|-q)  exec bash "$SCRIPT_DIR/scripts/audit/quick-glance.sh" ;;
+    --full|-f)   exec bash "$SCRIPT_DIR/scripts/audit/system-report.sh" ;;
+    --json|-j)   exec bash "$SCRIPT_DIR/scripts/audit/system-report.sh" --json ;;
+    *)
+        echo "Usage: audit [--quick|--full|--json]"
+        echo "  --quick  Single-screen system status (default)"
+        echo "  --full   Detailed report saved to data/audits/"
+        echo "  --json   JSON output to stdout"
+        exit 1
+        ;;
+esac
diff --git a/bin/benchmark b/bin/benchmark
new file mode 100755
index 0000000..ce0e400
--- /dev/null
+++ b/bin/benchmark
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Benchmark dispatcher
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+case "${1:-help}" in
+    setup)    exec bash "$SCRIPT_DIR/scripts/benchmark/setup.sh" ;;
+    baseline) exec bash "$SCRIPT_DIR/scripts/benchmark/run-baseline.sh" "${@:2}" ;;
+    run)      exec bash "$SCRIPT_DIR/scripts/benchmark/run-suite.sh" "${@:2}" ;;
+    compare)  exec bash "$SCRIPT_DIR/scripts/benchmark/compare.sh" "${@:2}" ;;
+    *)
+        echo "Usage: benchmark <command> [options]"
+        echo "  setup     Ensure toolboxes and test models are ready"
+        echo "  baseline  Capture pre-optimization baseline"
+        echo "  run       Run full benchmark suite (--tag NAME, --backends LIST)"
+        echo "  compare   Compare two runs (DIR1 DIR2)"
+        exit 1
+        ;;
+esac
diff --git a/bin/monitor b/bin/monitor
new file mode 100755
index 0000000..71119a7
--- /dev/null
+++ b/bin/monitor
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Monitoring dispatcher
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+case "${1:---dashboard}" in
+    --dashboard|-d) exec bash "$SCRIPT_DIR/scripts/monitor/dashboard.sh" ;;
+    --simple|-s)    exec bash "$SCRIPT_DIR/scripts/monitor/dashboard.sh" --simple ;;
+    --log|-l)       exec bash "$SCRIPT_DIR/scripts/monitor/log-metrics.sh" ;;
+    --install|-i)   exec bash "$SCRIPT_DIR/scripts/monitor/install-tools.sh" ;;
+    *)
+        echo "Usage: monitor [--dashboard|--simple|--log|--install]"
+        echo "  --dashboard  Tmux 3-pane: GPU + system + metrics (default)"
+        echo "  --simple     amdgpu_top only"
+        echo "  --log        Start background CSV metric logger"
+        echo "  --install    Install monitoring tools"
+        exit 1
+        ;;
+esac
diff --git a/bin/optimize b/bin/optimize
new file mode 100755
index 0000000..406400b
--- /dev/null
+++ b/bin/optimize
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Optimization dispatcher
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+case "${1:---all}" in
+    --all|-a)
+        echo "Running optimization walkthrough..."
+        bash "$SCRIPT_DIR/scripts/optimize/tuned-profile.sh"
+        bash "$SCRIPT_DIR/scripts/optimize/kernel-params.sh"
+        bash "$SCRIPT_DIR/scripts/optimize/vram-gtt.sh"
+        echo ""
+        bash "$SCRIPT_DIR/scripts/optimize/verify.sh"
+        ;;
+    --kernel|-k)  exec bash "$SCRIPT_DIR/scripts/optimize/kernel-params.sh" ;;
+    --tuned|-t)   exec bash "$SCRIPT_DIR/scripts/optimize/tuned-profile.sh" ;;
+    --vram|-v)    exec bash "$SCRIPT_DIR/scripts/optimize/vram-gtt.sh" ;;
+    --verify)     exec bash "$SCRIPT_DIR/scripts/optimize/verify.sh" ;;
+    --rollback)   exec bash "$SCRIPT_DIR/scripts/optimize/rollback.sh" ;;
+    *)
+        echo "Usage: optimize [--all|--kernel|--tuned|--vram|--verify|--rollback]"
+        echo "  --all       Full optimization walkthrough (default)"
+        echo "  --kernel    Configure kernel boot parameters"
+        echo "  --tuned     Switch tuned profile"
+        echo "  --vram      BIOS VRAM + GTT guidance"
+        echo "  --verify    Post-optimization checklist"
+        echo "  --rollback  Revert changes"
+        exit 1
+        ;;
+esac
diff --git a/configs/grub-cmdline.conf b/configs/grub-cmdline.conf
new file mode 100644
index 0000000..0997d85
--- /dev/null
+++ b/configs/grub-cmdline.conf
@@ -0,0 +1,17 @@
+# Recommended kernel boot parameters for AMD Strix Halo
+# Add to GRUB_CMDLINE_LINUX in /etc/default/grub
+#
+# After editing, regenerate GRUB:
+#   sudo grub2-mkconfig -o /boot/grub2/grub.cfg
+# Then reboot.
+#
+# For 64GB system (HP ZBook Ultra G1a):
+iommu=pt amdgpu.gttsize=60416 ttm.pages_limit=15466496
+#
+# For 128GB system (Framework Desktop, GMKtec EVO X2):
+# iommu=pt amdgpu.gttsize=126976 ttm.pages_limit=32505856
+#
+# Parameter explanation:
+#   iommu=pt           - IOMMU passthrough, reduces memory access latency
+#   amdgpu.gttsize=N   - Max GPU-addressable system RAM in MiB (total - 4GB reserve)
+#   ttm.pages_limit=N  - Max pinnable 4K pages (gttsize_MiB * 256)
diff --git a/docs/bios-vram-guide.md b/docs/bios-vram-guide.md
new file mode 100644
index 0000000..e8fa32b
--- /dev/null
+++ b/docs/bios-vram-guide.md
@@ -0,0 +1,40 @@
+# BIOS VRAM Configuration — HP ZBook Ultra G1a
+
+## Why Change VRAM?
+
+AMD Strix Halo uses **unified memory** — the CPU and GPU share the same physical RAM. By default, the HP ZBook allocates **32 GB as dedicated VRAM**, permanently locking that memory away from the OS even when the GPU isn't using it.
+
+AMD recommends keeping dedicated VRAM at **512 MB** (minimum) and using **GTT (Graphics Translation Table)** for dynamic GPU memory access. With kernel boot parameters, the GPU can access up to ~60 GB on demand while the CPU retains full flexibility.
+
+## Current vs Optimal (64 GB system)
+
+| Setting | Default | Optimal |
+|---------|---------|---------|
+| Dedicated VRAM | 32 GB | 0.5 GB |
+| GTT (dynamic) | ~15.5 GB | ~59 GB |
+| OS visible RAM | ~31 GB | ~63.5 GB |
+
+## Steps
+
+1. **Reboot** the laptop
+2. Press **F10** repeatedly during boot to enter BIOS Setup
+3. Navigate to: **Advanced** > **Built-in Device Options** (or **Display** section)
+4. Find: **UMA Frame Buffer Size** (may also be labeled "iGPU Memory" or "VRAM Size")
+5. Set to: **512 MB** (or the smallest available option)
+6. **Save and Exit** (F10)
+
+> The exact menu path may vary by BIOS version. If you can't find it under "Built-in Device Options", check under "Advanced > Display" or "Chipset Configuration".
+
+## After BIOS Change
+
+1. Ensure kernel boot parameters are configured (run `make optimize-kernel`)
+2. Reboot
+3. Verify with `make audit`:
+   - VRAM should show ~0.5 GiB
+   - GTT should show ~59 GiB
+   - System RAM should show ~63.5 GiB
+
+## References
+
+- [AMD ROCm Strix Halo Guide](https://rocm.docs.amd.com/en/latest/how-to/system-optimization/strixhalo.html)
+- [Strix Halo Toolboxes](https://strix-halo-toolboxes.com/)
diff --git a/lib/common.sh b/lib/common.sh
new file mode 100644
index 0000000..1a01941
--- /dev/null
+++ b/lib/common.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Common utilities for strix-halo-optimizations scripts
+
+set -euo pipefail
+
+# Auto-detect project root
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[1]:-${BASH_SOURCE[0]}}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR" && while [[ ! -f Makefile ]] && [[ "$PWD" != "/" ]]; do cd ..; done; pwd)"
+if [[ "$PROJECT_ROOT" == "/" ]]; then
+    # Fallback: assume lib/ is one level below project root
+    PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+fi
+
+# Colors (disabled if not a terminal)
+if [[ -t 1 ]]; then
+    RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'
+    BLUE='\033[0;34m'; CYAN='\033[0;36m'; BOLD='\033[1m'
+    DIM='\033[2m'; RESET='\033[0m'
+else
+    RED=''; GREEN=''; YELLOW=''; BLUE=''; CYAN=''; BOLD=''; DIM=''; RESET=''
+fi
+
+log_info()    { printf "${BLUE}[INFO]${RESET} %s\n" "$*"; }
+log_success() { printf "${GREEN}[OK]${RESET}   %s\n" "$*"; }
+log_warn()    { printf "${YELLOW}[WARN]${RESET} %s\n" "$*"; }
+log_error()   { printf "${RED}[ERR]${RESET}  %s\n" "$*" >&2; }
+log_header()  { printf "\n${BOLD}=== %s ===${RESET}\n" "$*"; }
+
+is_cmd() { command -v "$1" &>/dev/null; }
+
+require_root() {
+    if [[ $EUID -ne 0 ]]; then
+        log_error "This script requires root privileges. Run with sudo."
+        exit 1
+    fi
+}
+
+confirm() {
+    local prompt="${1:-Continue?}"
+    printf "${YELLOW}%s [y/N] ${RESET}" "$prompt"
+    read -r reply
+    [[ "$reply" =~ ^[Yy]$ ]]
+}
+
+data_dir() {
+    local subdir="${1:-.}"
+    local dir="$PROJECT_ROOT/data/$subdir"
+    mkdir -p "$dir"
+    echo "$dir"
+}
+
+timestamp() { date '+%Y%m%d-%H%M%S'; }
diff --git a/lib/detect.sh b/lib/detect.sh
new file mode 100644
index 0000000..4087952
--- /dev/null
+++ b/lib/detect.sh
@@ -0,0 +1,197 @@
+#!/usr/bin/env bash
+# Hardware and configuration detection for Strix Halo
+
+# Find the amdgpu DRM card path
+find_gpu_card() {
+    local card
+    for card in /sys/class/drm/card*/device/vendor; do
+        if [[ -f "$card" ]] && [[ "$(cat "$card")" == "0x1002" ]]; then
+            echo "$(dirname "$card")"
+            return 0
+        fi
+    done
+    # Fallback: try any card with mem_info_vram_total (i.e., an amdgpu device)
+    for card in /sys/class/drm/card*/device/mem_info_vram_total; do
+        if [[ -f "$card" ]]; then
+            echo "$(dirname "$card")"
+            return 0
+        fi
+    done
+    echo "/sys/class/drm/card1/device"  # last resort
+}
+
+GPU_SYSFS="$(find_gpu_card)"
+
+# --- CPU ---
+detect_cpu_model()   { grep -m1 'model name' /proc/cpuinfo | cut -d: -f2 | xargs; }
+detect_cpu_cores()   { grep -c '^processor' /proc/cpuinfo; }
+detect_cpu_physical() { grep 'cpu cores' /proc/cpuinfo | head -1 | cut -d: -f2 | xargs; }
+
+# --- GPU ---
+detect_gpu_name() {
+    lspci | grep -i 'Display\|VGA' | grep -i 'AMD' | head -1 | sed 's/.*: //'
+}
+
+detect_gpu_device_id() {
+    cat "$GPU_SYSFS/device" 2>/dev/null | sed 's/^0x//'
+}
+
+# --- Memory (bytes) ---
+detect_vram_total() { cat "$GPU_SYSFS/mem_info_vram_total" 2>/dev/null || echo 0; }
+detect_vram_used()  { cat "$GPU_SYSFS/mem_info_vram_used" 2>/dev/null || echo 0; }
+detect_gtt_total()  { cat "$GPU_SYSFS/mem_info_gtt_total" 2>/dev/null || echo 0; }
+detect_gtt_used()   { cat "$GPU_SYSFS/mem_info_gtt_used" 2>/dev/null || echo 0; }
+
+detect_system_ram_kb() {
+    local kb
+    kb="$(grep MemTotal /proc/meminfo 2>/dev/null | awk '{print $2}')"
+    echo "${kb:-0}"
+}
+detect_system_ram_bytes() { echo $(( $(detect_system_ram_kb) * 1024 )); }
+
+# --- Kernel ---
+detect_kernel_version() { uname -r; }
+
+detect_kernel_param() {
+    # Returns the value of a kernel param, or empty if not present
+    local param="$1"
+    local cmdline
+    cmdline="$(cat /proc/cmdline)"
+    # Escape dots for regex and anchor with word boundary (space or start-of-string)
+    local pattern="${param//./\\.}"
+    if [[ "$cmdline" =~ (^|[[:space:]])${pattern}=([^ ]+) ]]; then
+        echo "${BASH_REMATCH[2]}"
+    elif [[ "$cmdline" =~ (^|[[:space:]])${pattern}([[:space:]]|$) ]]; then
+        echo "present"
+    fi
+}
+
+detect_has_iommu_pt() {
+    local val
+    val="$(detect_kernel_param 'iommu')"
+    [[ "$val" == "pt" ]]
+}
+
+detect_gttsize_param() { detect_kernel_param 'amdgpu.gttsize'; }
+detect_pages_limit_param() { detect_kernel_param 'ttm.pages_limit'; }
+
+# --- Tuned ---
+detect_tuned_profile() {
+    if is_cmd tuned-adm; then
+        tuned-adm active 2>/dev/null | sed 's/Current active profile: //'
+    else
+        echo "tuned not installed"
+    fi
+}
+
+# --- Firmware ---
+detect_firmware_version() {
+    rpm -q linux-firmware 2>/dev/null | sed 's/linux-firmware-//' | sed 's/\.fc.*//' || echo "unknown"
+}
+
+detect_firmware_bad() {
+    # Returns 0 (true) if firmware is the known-bad version
+    local fw
+    fw="$(detect_firmware_version)"
+    [[ "$fw" == *"20251125"* ]]
+}
+
+# --- ROCm ---
+detect_rocm_version() {
+    if [[ -f /opt/rocm/.info/version ]]; then
+        cat /opt/rocm/.info/version
+    else
+        rpm -qa 2>/dev/null | grep '^rocm-core-' | head -1 | sed 's/rocm-core-//' | sed 's/-.*//' || echo "not installed"
+    fi
+}
+
+detect_rocm_packages() {
+    rpm -qa 2>/dev/null | grep -i rocm | sort
+}
+
+# --- Vulkan ---
+detect_vulkan_driver() {
+    if is_cmd vulkaninfo; then
+        vulkaninfo --summary 2>/dev/null | grep 'driverName' | head -1 | awk '{print $NF}'
+    else
+        echo "vulkaninfo not available"
+    fi
+}
+
+detect_vulkan_version() {
+    if is_cmd vulkaninfo; then
+        vulkaninfo --summary 2>/dev/null | grep 'apiVersion' | head -1 | awk '{print $NF}'
+    fi
+}
+
+# --- Toolbox containers ---
+detect_toolboxes() {
+    if is_cmd toolbox; then
+        toolbox list --containers 2>/dev/null | tail -n +2
+    fi
+}
+
+detect_toolbox_names() {
+    detect_toolboxes | awk '{print $2}' 2>/dev/null
+}
+
+# --- LLM stacks ---
+detect_stack_ollama()   { is_cmd ollama && echo "installed" || echo "missing"; }
+detect_stack_lmstudio() { is_cmd lms && echo "installed" || echo "missing"; }
+detect_stack_llamacpp() { (is_cmd llama-cli || is_cmd llama-bench) && echo "installed" || echo "missing"; }
+detect_stack_opencode() { is_cmd opencode && echo "installed" || echo "missing"; }
+
+# --- Sensors ---
+detect_gpu_temp() {
+    # Returns temperature in millidegrees C
+    local hwmon
+    for hwmon in "$GPU_SYSFS"/hwmon/hwmon*/temp1_input; do
+        if [[ -f "$hwmon" ]]; then
+            cat "$hwmon"
+            return
+        fi
+    done
+    echo 0
+}
+
+detect_gpu_power() {
+    # Returns power in microwatts
+    local hwmon
+    for hwmon in "$GPU_SYSFS"/hwmon/hwmon*/power1_average; do
+        if [[ -f "$hwmon" ]]; then
+            cat "$hwmon"
+            return
+        fi
+    done
+    echo 0
+}
+
+detect_gpu_busy() {
+    cat "$GPU_SYSFS/gpu_busy_percent" 2>/dev/null || echo 0
+}
+
+# --- Total physical memory (visible + VRAM dedicated) ---
+detect_total_physical_ram_kb() {
+    local visible_kb vram_bytes vram_kb
+    visible_kb="$(detect_system_ram_kb)"
+    vram_bytes="$(detect_vram_total)"
+    vram_kb=$(( vram_bytes / 1024 ))
+    echo $(( visible_kb + vram_kb ))
+}
+
+# --- Recommended values for this system ---
+recommended_gttsize_mib() {
+    # Total physical RAM (including VRAM allocation) minus 4 GiB reserve, in MiB
+    local total_kb
+    total_kb="$(detect_total_physical_ram_kb)"
+    local total_gib=$(( total_kb / 1024 / 1024 ))
+    local gtt_gib=$(( total_gib - 4 ))
+    echo $(( gtt_gib * 1024 ))
+}
+
+recommended_pages_limit() {
+    # GTT GiB * 1024 MiB/GiB * 256 pages/MiB
+    local gtt_mib
+    gtt_mib="$(recommended_gttsize_mib)"
+    echo $(( gtt_mib * 256 ))
+}
diff --git a/lib/format.sh b/lib/format.sh
new file mode 100644
index 0000000..ea4e94b
--- /dev/null
+++ b/lib/format.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# Formatting utilities
+# Requires: lib/common.sh must be sourced first (provides color variables)
+
+# Guard: ensure color variables are defined (sourced from common.sh)
+: "${GREEN:=}" "${RED:=}" "${YELLOW:=}" "${CYAN:=}" "${BOLD:=}" "${DIM:=}" "${RESET:=}"
+
+human_bytes() {
+    local bytes="${1:-0}"
+    if (( bytes >= 1073741824 )); then
+        local val
+        val="$(echo "scale=1; $bytes / 1073741824" | bc)"
+        printf "%s GiB" "${val/#./0.}"
+    elif (( bytes >= 1048576 )); then
+        printf "%d MiB" "$(( bytes / 1048576 ))"
+    elif (( bytes >= 1024 )); then
+        printf "%d KiB" "$(( bytes / 1024 ))"
+    else
+        printf "%d B" "$bytes"
+    fi
+}
+
+human_mib() {
+    local mib="${1:-0}"
+    if (( mib >= 1024 )); then
+        local val
+        val="$(echo "scale=1; $mib / 1024" | bc)"
+        printf "%s GiB" "${val/#./0.}"
+    else
+        printf "%d MiB" "$mib"
+    fi
+}
+
+# Status indicators
+STATUS_PASS="${GREEN}[OK]${RESET}"
+STATUS_FAIL="${RED}[!!]${RESET}"
+STATUS_WARN="${YELLOW}[??]${RESET}"
+STATUS_INFO="${CYAN}[--]${RESET}"
+
+print_status() {
+    # Usage: print_status pass|fail|warn|info "label" "detail"
+    local kind="$1" label="$2" detail="${3:-}"
+    local indicator
+    case "$kind" in
+        pass) indicator="$STATUS_PASS" ;;
+        fail) indicator="$STATUS_FAIL" ;;
+        warn) indicator="$STATUS_WARN" ;;
+        *)    indicator="$STATUS_INFO" ;;
+    esac
+    printf "  %b %-30s %s\n" "$indicator" "$label" "$detail"
+}
+
+print_kv() {
+    local key="$1" value="$2"
+    printf "  %b%-24s%b %s\n" "$DIM" "$key:" "$RESET" "$value"
+}
+
+print_divider() {
+    printf "%b%s%b\n" "$DIM" "$(printf '%.0s─' {1..60})" "$RESET"
+}
+
+# Table helpers — format strings are caller-controlled constants, not user input
+print_table_header() {
+    local fmt="$1"; shift
+    # shellcheck disable=SC2059 — format string is a trusted constant from callers
+    printf "${BOLD}${fmt}${RESET}\n" "$@"
+    print_divider
+}
+
+print_table_row() {
+    local fmt="$1"; shift
+    # shellcheck disable=SC2059 — format string is a trusted constant from callers
+    printf "${fmt}\n" "$@"
+}
diff --git a/scripts/audit/quick-glance.sh b/scripts/audit/quick-glance.sh
new file mode 100644
index 0000000..4621d58
--- /dev/null
+++ b/scripts/audit/quick-glance.sh
@@ -0,0 +1,180 @@
+#!/usr/bin/env bash
+# Quick-glance system audit — single screen status overview
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+# ── Gather data ──────────────────────────────────────────
+cpu_model="$(detect_cpu_model)"
+cpu_threads="$(detect_cpu_cores)"
+cpu_physical="$(detect_cpu_physical)"
+gpu_name="$(detect_gpu_name)"
+kernel="$(detect_kernel_version)"
+firmware="$(detect_firmware_version)"
+
+vram_total="$(detect_vram_total)"
+vram_used="$(detect_vram_used)"
+gtt_total="$(detect_gtt_total)"
+gtt_used="$(detect_gtt_used)"
+ram_kb="$(detect_system_ram_kb)"
+ram_bytes=$(( ram_kb * 1024 ))
+
+param_iommu="$(detect_kernel_param 'iommu')"
+param_gttsize="$(detect_gttsize_param)"
+param_pages="$(detect_pages_limit_param)"
+
+tuned="$(detect_tuned_profile)"
+
+rocm_ver="$(detect_rocm_version)"
+vulkan_drv="$(detect_vulkan_driver)"
+vulkan_ver="$(detect_vulkan_version)"
+
+rec_gttsize="$(recommended_gttsize_mib)"
+rec_pages="$(recommended_pages_limit)"
+
+# ── Score tracking ───────────────────────────────────────
+score=0
+total=0
+
+check() {
+    local pass="$1" label="$2" detail="$3"
+    total=$(( total + 1 ))
+    if [[ "$pass" == "1" ]]; then
+        score=$(( score + 1 ))
+        print_status pass "$label" "$detail"
+    else
+        print_status fail "$label" "$detail"
+    fi
+}
+
+check_warn() {
+    local label="$1" detail="$2"
+    print_status warn "$label" "$detail"
+}
+
+check_info() {
+    local label="$1" detail="$2"
+    print_status info "$label" "$detail"
+}
+
+# ── Output ───────────────────────────────────────────────
+printf "\n${BOLD}${CYAN}"
+cat << 'BANNER'
+  ╔═══════════════════════════════════════════╗
+  ║   AMD Strix Halo — System Status          ║
+  ╚═══════════════════════════════════════════╝
+BANNER
+printf "${RESET}"
+
+# Hardware
+log_header "Hardware"
+print_kv "CPU" "$cpu_model (${cpu_physical}C/${cpu_threads}T)"
+print_kv "GPU" "$gpu_name"
+print_kv "System RAM (visible)" "$(human_bytes "$ram_bytes")"
+
+# Kernel & Firmware
+log_header "Kernel & Firmware"
+kernel_major=$(echo "$kernel" | cut -d. -f1)
+kernel_minor=$(echo "$kernel" | cut -d. -f2)
+kernel_ok=0
+if (( kernel_major > 6 )) || (( kernel_major == 6 && kernel_minor >= 18 )); then
+    kernel_ok=1
+fi
+check "$kernel_ok" "Kernel version" "$kernel (need >= 6.18.4)"
+
+firmware_ok=1
+firmware_note="$firmware"
+if detect_firmware_bad; then
+    firmware_ok=0
+    firmware_note="$firmware (KNOWN BAD — causes ROCm crashes!)"
+fi
+check "$firmware_ok" "Firmware" "$firmware_note"
+
+# Memory allocation
+log_header "Memory Allocation"
+vram_gib=$(echo "scale=1; $vram_total / 1073741824" | bc)
+gtt_gib=$(echo "scale=1; $gtt_total / 1073741824" | bc)
+
+# VRAM: should be <= 1 GiB (ideally 0.5 GiB)
+vram_ok=0
+(( vram_total <= 1073741824 )) && vram_ok=1
+check "$vram_ok" "VRAM (dedicated)" "${vram_gib} GiB$([ "$vram_ok" -eq 0 ] && echo " — should be 0.5 GiB in BIOS")"
+
+# GTT: should be close to recommended (at least 75%)
+gtt_rec_bytes=$(( rec_gttsize * 1048576 ))
+gtt_ok=0
+(( gtt_total >= gtt_rec_bytes * 3 / 4 )) && gtt_ok=1
+check "$gtt_ok" "GTT (dynamic)" "${gtt_gib} GiB$([ "$gtt_ok" -eq 0 ] && echo " — should be ~$(human_mib "$rec_gttsize") with kernel params")"
+
+print_kv "VRAM in use" "$(human_bytes "$vram_used")"
+print_kv "GTT in use" "$(human_bytes "$gtt_used")"
+
+# Kernel boot parameters
+log_header "Kernel Boot Parameters"
+iommu_ok=0
+[[ "$param_iommu" == "pt" ]] && iommu_ok=1
+check "$iommu_ok" "iommu=pt" "$([ -n "$param_iommu" ] && echo "current: $param_iommu" || echo "MISSING")"
+
+gtt_param_ok=0
+[[ -n "$param_gttsize" ]] && gtt_param_ok=1
+check "$gtt_param_ok" "amdgpu.gttsize" "$([ -n "$param_gttsize" ] && echo "current: ${param_gttsize} MiB" || echo "MISSING — recommended: ${rec_gttsize}")"
+
+pages_ok=0
+[[ -n "$param_pages" ]] && pages_ok=1
+check "$pages_ok" "ttm.pages_limit" "$([ -n "$param_pages" ] && echo "current: $param_pages" || echo "MISSING — recommended: ${rec_pages}")"
+
+# Tuned profile
+log_header "Performance Profile"
+tuned_ok=0
+[[ "$tuned" == "accelerator-performance" ]] && tuned_ok=1
+check "$tuned_ok" "Tuned profile" "$tuned$([ "$tuned_ok" -eq 0 ] && echo " — recommended: accelerator-performance")"
+
+# Software stack
+log_header "Software Stack"
+check_info "ROCm" "$rocm_ver"
+check_info "Vulkan" "$vulkan_drv $vulkan_ver"
+
+# Toolboxes
+toolbox_count=0
+if is_cmd toolbox; then
+    toolbox_count=$(detect_toolbox_names | wc -l)
+fi
+if (( toolbox_count > 0 )); then
+    check_info "Toolbox containers" "$toolbox_count available"
+    detect_toolbox_names | while read -r name; do
+        printf "       ${DIM}%s${RESET}\n" "$name"
+    done
+else
+    check_warn "Toolbox containers" "none — run 'make benchmark-setup'"
+fi
+
+# LLM stacks
+log_header "LLM Stacks"
+check_info "LM Studio" "$(detect_stack_lmstudio)"
+check_info "opencode" "$(detect_stack_opencode)"
+check_info "ollama" "$(detect_stack_ollama)"
+check_info "llama.cpp (native)" "$(detect_stack_llamacpp)"
+
+# Sensors
+log_header "Current Sensors"
+gpu_temp="$(detect_gpu_temp)"
+gpu_power="$(detect_gpu_power)"
+gpu_busy="$(detect_gpu_busy)"
+print_kv "GPU Temperature" "$(echo "scale=1; $gpu_temp / 1000" | bc) C"
+print_kv "GPU Power" "$(echo "scale=1; $gpu_power / 1000000" | bc) W"
+print_kv "GPU Utilization" "${gpu_busy}%"
+
+# Overall score
+log_header "Optimization Score"
+printf "\n  ${BOLD}%d / %d${RESET} checks passing\n" "$score" "$total"
+if (( score == total )); then
+    printf "  ${GREEN}System is fully optimized!${RESET}\n"
+elif (( score >= total / 2 )); then
+    printf "  ${YELLOW}Partially optimized — run 'make optimize' for improvements${RESET}\n"
+else
+    printf "  ${RED}Significant optimizations available — run 'make optimize'${RESET}\n"
+fi
+echo ""
diff --git a/scripts/audit/system-report.sh b/scripts/audit/system-report.sh
new file mode 100644
index 0000000..bf4cdb5
--- /dev/null
+++ b/scripts/audit/system-report.sh
@@ -0,0 +1,194 @@
+#!/usr/bin/env bash
+# Full system report — detailed audit with JSON + text output
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+JSON_ONLY=false
+[[ "${1:-}" == "--json" ]] && JSON_ONLY=true
+
+# ── Gather all data ──────────────────────────────────────
+ts="$(timestamp)"
+cpu_model="$(detect_cpu_model)"
+cpu_threads="$(detect_cpu_cores)"
+cpu_physical="$(detect_cpu_physical)"
+gpu_name="$(detect_gpu_name)"
+gpu_device_id="$(detect_gpu_device_id)"
+kernel="$(detect_kernel_version)"
+firmware="$(detect_firmware_version)"
+
+vram_total="$(detect_vram_total)"
+vram_used="$(detect_vram_used)"
+gtt_total="$(detect_gtt_total)"
+gtt_used="$(detect_gtt_used)"
+ram_kb="$(detect_system_ram_kb)"
+
+param_iommu="$(detect_kernel_param 'iommu')"
+param_gttsize="$(detect_gttsize_param)"
+param_pages="$(detect_pages_limit_param)"
+cmdline="$(cat /proc/cmdline)"
+
+tuned="$(detect_tuned_profile)"
+rocm_ver="$(detect_rocm_version)"
+vulkan_drv="$(detect_vulkan_driver)"
+vulkan_ver="$(detect_vulkan_version)"
+
+gpu_temp="$(detect_gpu_temp)"
+gpu_power="$(detect_gpu_power)"
+gpu_busy="$(detect_gpu_busy)"
+
+rec_gttsize="$(recommended_gttsize_mib)"
+rec_pages="$(recommended_pages_limit)"
+
+# Toolbox list
+toolboxes_json="[]"
+if is_cmd toolbox; then
+    toolboxes_json="$(detect_toolbox_names | jq -R . | jq -s . 2>/dev/null || echo '[]')"
+fi
+
+# LLM stacks
+stack_ollama="$(detect_stack_ollama)"
+stack_lmstudio="$(detect_stack_lmstudio)"
+stack_llamacpp="$(detect_stack_llamacpp)"
+stack_opencode="$(detect_stack_opencode)"
+
+# ROCm packages
+rocm_pkgs="$(detect_rocm_packages | head -30)"
+
+# ── Build JSON (all data via env vars — no shell interpolation into Python) ──
+json_report="$(
+    SR_TS="$ts" \
+    SR_CPU_MODEL="$cpu_model" SR_CPU_CORES="$cpu_physical" SR_CPU_THREADS="$cpu_threads" \
+    SR_GPU_NAME="$gpu_name" SR_GPU_DEVICE_ID="$gpu_device_id" SR_RAM_KB="$ram_kb" \
+    SR_VRAM_TOTAL="$vram_total" SR_VRAM_USED="$vram_used" \
+    SR_GTT_TOTAL="$gtt_total" SR_GTT_USED="$gtt_used" \
+    SR_REC_GTTSIZE="$rec_gttsize" SR_REC_PAGES="$rec_pages" \
+    SR_KERNEL="$kernel" SR_CMDLINE="$cmdline" \
+    SR_PARAM_IOMMU="$param_iommu" SR_PARAM_GTTSIZE="$param_gttsize" SR_PARAM_PAGES="$param_pages" \
+    SR_FIRMWARE="$firmware" SR_TUNED="$tuned" SR_ROCM="$rocm_ver" \
+    SR_VULKAN_DRV="$vulkan_drv" SR_VULKAN_VER="${vulkan_ver:-}" \
+    SR_GPU_TEMP="$gpu_temp" SR_GPU_POWER="$gpu_power" SR_GPU_BUSY="$gpu_busy" \
+    SR_TOOLBOXES="$toolboxes_json" \
+    SR_STACK_OLLAMA="$stack_ollama" SR_STACK_LMSTUDIO="$stack_lmstudio" \
+    SR_STACK_LLAMACPP="$stack_llamacpp" SR_STACK_OPENCODE="$stack_opencode" \
+    python3 -c '
+import json, os
+e = os.environ
+data = {
+    "timestamp": e["SR_TS"],
+    "hardware": {
+        "cpu_model": e["SR_CPU_MODEL"],
+        "cpu_cores": int(e["SR_CPU_CORES"]),
+        "cpu_threads": int(e["SR_CPU_THREADS"]),
+        "gpu_name": e["SR_GPU_NAME"],
+        "gpu_device_id": e["SR_GPU_DEVICE_ID"],
+        "system_ram_kb": int(e["SR_RAM_KB"]),
+    },
+    "memory": {
+        "vram_total_bytes": int(e["SR_VRAM_TOTAL"]),
+        "vram_used_bytes": int(e["SR_VRAM_USED"]),
+        "gtt_total_bytes": int(e["SR_GTT_TOTAL"]),
+        "gtt_used_bytes": int(e["SR_GTT_USED"]),
+        "recommended_gttsize_mib": int(e["SR_REC_GTTSIZE"]),
+        "recommended_pages_limit": int(e["SR_REC_PAGES"]),
+    },
+    "kernel": {
+        "version": e["SR_KERNEL"],
+        "cmdline": e["SR_CMDLINE"],
+        "param_iommu": e["SR_PARAM_IOMMU"],
+        "param_gttsize": e["SR_PARAM_GTTSIZE"],
+        "param_pages_limit": e["SR_PARAM_PAGES"],
+    },
+    "firmware": e["SR_FIRMWARE"],
+    "tuned_profile": e["SR_TUNED"],
+    "rocm_version": e["SR_ROCM"],
+    "vulkan": {
+        "driver": e["SR_VULKAN_DRV"],
+        "version": e["SR_VULKAN_VER"],
+    },
+    "sensors": {
+        "gpu_temp_mc": int(e["SR_GPU_TEMP"]),
+        "gpu_power_uw": int(e["SR_GPU_POWER"]),
+        "gpu_busy_pct": int(e["SR_GPU_BUSY"]),
+    },
+    "toolboxes": json.loads(e["SR_TOOLBOXES"]),
+    "stacks": {
+        "ollama": e["SR_STACK_OLLAMA"],
+        "lmstudio": e["SR_STACK_LMSTUDIO"],
+        "llamacpp": e["SR_STACK_LLAMACPP"],
+        "opencode": e["SR_STACK_OPENCODE"],
+    },
+}
+print(json.dumps(data, indent=2))
+'
+)"
+
+if $JSON_ONLY; then
+    echo "$json_report" | python3 -m json.tool 2>/dev/null || echo "$json_report"
+    exit 0
+fi
+
+# ── Save report ──────────────────────────────────────────
+audit_dir="$(data_dir audits)"
+json_file="$audit_dir/report-${ts}.json"
+text_file="$audit_dir/report-${ts}.txt"
+
+echo "$json_report" | python3 -m json.tool > "$json_file" 2>/dev/null || echo "$json_report" > "$json_file"
+
+# ── Text output (also saved) ────────────────────────────
+{
+    printf "Strix Halo Full System Report — %s\n" "$ts"
+    printf "=%.0s" {1..60}; echo
+
+    printf "\nHardware:\n"
+    printf "  CPU:     %s (%sC/%sT)\n" "$cpu_model" "$cpu_physical" "$cpu_threads"
+    printf "  GPU:     %s (device: 0x%s)\n" "$gpu_name" "$gpu_device_id"
+    printf "  RAM:     %s KB\n" "$ram_kb"
+
+    printf "\nMemory Allocation:\n"
+    printf "  VRAM total:   %s (used: %s)\n" "$(human_bytes "$vram_total")" "$(human_bytes "$vram_used")"
+    printf "  GTT total:    %s (used: %s)\n" "$(human_bytes "$gtt_total")" "$(human_bytes "$gtt_used")"
+    printf "  Recommended:  gttsize=%s MiB, pages_limit=%s\n" "$rec_gttsize" "$rec_pages"
+
+    printf "\nKernel:\n"
+    printf "  Version:    %s\n" "$kernel"
+    printf "  Firmware:   %s\n" "$firmware"
+    printf "  Cmdline:    %s\n" "$cmdline"
+    printf "  iommu:      %s\n" "${param_iommu:-not set}"
+    printf "  gttsize:    %s\n" "${param_gttsize:-not set}"
+    printf "  pages_limit:%s\n" "${param_pages:-not set}"
+
+    printf "\nPerformance:\n"
+    printf "  Tuned:      %s\n" "$tuned"
+    printf "  GPU temp:   %s C\n" "$(echo "scale=1; $gpu_temp / 1000" | bc)"
+    printf "  GPU power:  %s W\n" "$(echo "scale=1; $gpu_power / 1000000" | bc)"
+    printf "  GPU busy:   %s%%\n" "$gpu_busy"
+
+    printf "\nSoftware:\n"
+    printf "  ROCm:       %s\n" "$rocm_ver"
+    printf "  Vulkan:     %s %s\n" "$vulkan_drv" "$vulkan_ver"
+
+    printf "\nROCm Packages:\n"
+    echo "$rocm_pkgs" | sed 's/^/  /'
+
+    printf "\nToolboxes:\n"
+    if [[ "$toolboxes_json" == "[]" ]]; then
+        printf "  none\n"
+    else
+        echo "$toolboxes_json" | python3 -c "import sys,json; [print(f'  {x}') for x in json.load(sys.stdin)]" 2>/dev/null || printf "  (parse error)\n"
+    fi
+
+    printf "\nLLM Stacks:\n"
+    printf "  ollama:     %s\n" "$stack_ollama"
+    printf "  LM Studio:  %s\n" "$stack_lmstudio"
+    printf "  llama.cpp:  %s\n" "$stack_llamacpp"
+    printf "  opencode:   %s\n" "$stack_opencode"
+} | tee "$text_file"
+
+echo ""
+log_success "Report saved to:"
+log_info "  JSON: $json_file"
+log_info "  Text: $text_file"
diff --git a/scripts/benchmark/compare.sh b/scripts/benchmark/compare.sh
new file mode 100644
index 0000000..bf4a9a5
--- /dev/null
+++ b/scripts/benchmark/compare.sh
@@ -0,0 +1,140 @@
+#!/usr/bin/env bash
+# Compare two benchmark runs side-by-side
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+if [[ $# -lt 2 ]]; then
+    echo "Usage: benchmark compare <before-dir> <after-dir>"
+    echo ""
+    echo "Examples:"
+    echo "  bin/benchmark compare data/baselines/20260325-120000 data/benchmarks/post-opt-20260326-100000"
+    echo ""
+    echo "Available baselines:"
+    ls -d "$(data_dir baselines)"/*/ 2>/dev/null | sed 's|/$||' || echo "  (none)"
+    echo ""
+    echo "Available benchmark runs:"
+    ls -d "$(data_dir benchmarks)"/*/ 2>/dev/null | sed 's|/$||' || echo "  (none)"
+    exit 1
+fi
+
+BEFORE_DIR="$1"
+AFTER_DIR="$2"
+
+for d in "$BEFORE_DIR" "$AFTER_DIR"; do
+    if [[ ! -f "$d/summary.json" ]]; then
+        log_error "No summary.json in $d"
+        exit 1
+    fi
+done
+
+log_header "Benchmark Comparison"
+
+# Extract timestamps from directory names
+before_name="$(basename "$BEFORE_DIR")"
+after_name="$(basename "$AFTER_DIR")"
+log_info "Before: $before_name"
+log_info "After:  $after_name"
+
+# Show system state diff if available
+if [[ -f "$BEFORE_DIR/system-state.json" ]] && [[ -f "$AFTER_DIR/system-state.json" ]]; then
+    echo ""
+    python3 - "$BEFORE_DIR/system-state.json" "$AFTER_DIR/system-state.json" << 'PYEOF'
+import sys, json
+
+with open(sys.argv[1]) as f:
+    before = json.load(f)
+with open(sys.argv[2]) as f:
+    after = json.load(f)
+
+changes = []
+# Check key config differences
+b_mem = before.get("memory", {})
+a_mem = after.get("memory", {})
+if b_mem.get("vram_total_bytes") != a_mem.get("vram_total_bytes"):
+    bv = b_mem.get("vram_total_bytes", 0) / 2**30
+    av = a_mem.get("vram_total_bytes", 0) / 2**30
+    changes.append(f"  VRAM: {bv:.1f} GiB -> {av:.1f} GiB")
+if b_mem.get("gtt_total_bytes") != a_mem.get("gtt_total_bytes"):
+    bg = b_mem.get("gtt_total_bytes", 0) / 2**30
+    ag = a_mem.get("gtt_total_bytes", 0) / 2**30
+    changes.append(f"  GTT:  {bg:.1f} GiB -> {ag:.1f} GiB")
+
+b_kern = before.get("kernel", {})
+a_kern = after.get("kernel", {})
+for param in ["param_iommu", "param_gttsize", "param_pages_limit"]:
+    bv = b_kern.get(param, "")
+    av = a_kern.get(param, "")
+    if bv != av:
+        changes.append(f"  {param}: '{bv}' -> '{av}'")
+
+bt = before.get("tuned_profile", "")
+at = after.get("tuned_profile", "")
+if bt != at:
+    changes.append(f"  tuned: {bt} -> {at}")
+
+if changes:
+    print("  Configuration changes:")
+    for c in changes:
+        print(c)
+else:
+    print("  No configuration changes detected")
+PYEOF
+fi
+
+# Compare results
+echo ""
+python3 - "$BEFORE_DIR/summary.json" "$AFTER_DIR/summary.json" << 'PYEOF'
+import sys, json
+
+with open(sys.argv[1]) as f:
+    before = json.load(f)
+with open(sys.argv[2]) as f:
+    after = json.load(f)
+
+# Index by (model, backend, test)
+def index_results(data):
+    idx = {}
+    for r in data.get("results", []):
+        key = (r["model"], r["backend"], r["test"])
+        idx[key] = r["tokens_per_sec"]
+    return idx
+
+b_idx = index_results(before)
+a_idx = index_results(after)
+
+all_keys = sorted(set(b_idx.keys()) | set(a_idx.keys()))
+
+if not all_keys:
+    print("  No comparable results found.")
+    sys.exit(0)
+
+fmt = "  {:<18} {:<14} {:<7} {:>9} {:>9} {:>8}"
+print(fmt.format("Model", "Backend", "Test", "Before", "After", "Delta"))
+print("  " + "-" * 70)
+
+for key in all_keys:
+    model, backend, test = key
+    b_val = b_idx.get(key)
+    a_val = a_idx.get(key)
+
+    b_str = f"{b_val:.1f}" if b_val else "—"
+    a_str = f"{a_val:.1f}" if a_val else "—"
+
+    if b_val and a_val:
+        delta_pct = (a_val - b_val) / b_val * 100
+        if delta_pct > 0:
+            d_str = f"\033[32m+{delta_pct:.1f}%\033[0m"
+        elif delta_pct < 0:
+            d_str = f"\033[31m{delta_pct:.1f}%\033[0m"
+        else:
+            d_str = "0.0%"
+    else:
+        d_str = "—"
+
+    print(fmt.format(model[:18], backend[:14], test, b_str, a_str, d_str))
+
+print()
+PYEOF
diff --git a/scripts/benchmark/run-baseline.sh b/scripts/benchmark/run-baseline.sh
new file mode 100644
index 0000000..75d6531
--- /dev/null
+++ b/scripts/benchmark/run-baseline.sh
@@ -0,0 +1,223 @@
+#!/usr/bin/env bash
+# Capture pre-optimization baseline benchmark
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+MODEL_DIR="$(data_dir models)"
+TS="$(timestamp)"
+RESULT_DIR="$(data_dir baselines)/$TS"
+mkdir -p "$RESULT_DIR"
+
+REPS_STANDARD=5
+REPS_LONGCTX=3
+
+log_header "Baseline Benchmark Capture"
+log_info "Results will be saved to: $RESULT_DIR"
+
+# ── 1. Save system state ────────────────────────────────
+log_info "Capturing system state..."
+bash "$SCRIPT_DIR/../audit/system-report.sh" --json > "$RESULT_DIR/system-state.json" 2>/dev/null
+
+# ── 2. Discover available toolboxes and models ──────────
+existing="$(detect_toolbox_names 2>/dev/null || true)"
+
+# Map toolbox names to llama-bench commands (same pattern as upstream)
+declare -A BENCH_PATHS=(
+    [llama-vulkan-radv]="/usr/sbin/llama-bench"
+    [llama-vulkan-amdvlk]="/usr/sbin/llama-bench"
+    [llama-rocm-6.4.4]="/usr/local/bin/llama-bench"
+    [llama-rocm-7.2]="/usr/local/bin/llama-bench"
+    [llama-rocm7-nightlies]="/usr/local/bin/llama-bench"
+)
+
+available_backends=()
+for tb in "${!BENCH_PATHS[@]}"; do
+    if echo "$existing" | grep -q "^${tb}$"; then
+        available_backends+=("$tb")
+        log_success "Backend: $tb"
+    fi
+done
+
+if (( ${#available_backends[@]} == 0 )); then
+    log_error "No toolbox backends found. Run: make benchmark-setup"
+    exit 1
+fi
+
+# Find models
+mapfile -t MODEL_PATHS < <(
+    find "$MODEL_DIR" -type f -name '*.gguf' \
+        \( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \
+        | sort
+)
+
+if (( ${#MODEL_PATHS[@]} == 0 )); then
+    log_error "No GGUF models found in $MODEL_DIR. Run: make benchmark-setup"
+    exit 1
+fi
+
+log_info "Found ${#MODEL_PATHS[@]} model(s):"
+for p in "${MODEL_PATHS[@]}"; do
+    printf "  %s (%s)\n" "$(basename "$p")" "$(du -h "$p" | cut -f1)"
+done
+
+# ── 3. Start metric logging ─────────────────────────────
+METRICS_FILE="$RESULT_DIR/metrics.csv"
+bash "$SCRIPT_DIR/../monitor/log-metrics.sh" --output "$METRICS_FILE" --interval 2 &
+METRICS_PID=$!
+log_info "Metric logger started (PID: $METRICS_PID)"
+
+cleanup() {
+    kill "$METRICS_PID" 2>/dev/null || true
+    wait "$METRICS_PID" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# ── 4. Run benchmarks ───────────────────────────────────
+for MODEL_PATH in "${MODEL_PATHS[@]}"; do
+    MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
+
+    for BACKEND in "${available_backends[@]}"; do
+        BENCH_BIN="${BENCH_PATHS[$BACKEND]}"
+        BACKEND_SAFE="${BACKEND//[.-]/_}"
+
+        # Build environment args for ROCm backends
+        ENV_ARGS=()
+        if [[ "$BACKEND" == *rocm* ]]; then
+            ENV_ARGS=(env ROCBLAS_USE_HIPBLASLT=1)
+        fi
+
+        # Standard test (pp512 + tg128, default context)
+        OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log"
+        if [[ ! -s "$OUT" ]]; then
+            printf "\n${BOLD}>> [%s] %s — standard test${RESET}\n" "$BACKEND" "$MODEL_NAME"
+            CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                -ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1 -r "$REPS_STANDARD")
+
+            printf "  cmd: %s\n" "${CMD[*]}"
+            if "${CMD[@]}" > "$OUT" 2>&1; then
+                log_success "Standard test complete"
+                tail -5 "$OUT"
+            else
+                log_error "Standard test failed (exit $?)"
+                echo "FAILED" >> "$OUT"
+            fi
+        else
+            log_info "Skipping standard test (log exists): $OUT"
+        fi
+
+        # Long-context test (pp2048, tg32, ctx 32768)
+        OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
+        if [[ ! -s "$OUT_LC" ]]; then
+            printf "\n${BOLD}>> [%s] %s — long-context test${RESET}\n" "$BACKEND" "$MODEL_NAME"
+
+            UB_SIZE=2048
+            [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
+
+            CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                -ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1
+                -p 2048 -n 32 -d 32768 -ub "$UB_SIZE"
+                -r "$REPS_LONGCTX")
+
+            printf "  cmd: %s\n" "${CMD_LC[*]}"
+            if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
+                log_success "Long-context test complete"
+                tail -5 "$OUT_LC"
+            else
+                log_error "Long-context test failed (exit $?)"
+                echo "FAILED" >> "$OUT_LC"
+            fi
+        else
+            log_info "Skipping long-context test (log exists): $OUT_LC"
+        fi
+    done
+done
+
+# ── 5. Parse results into summary JSON ──────────────────
+log_info "Parsing results..."
+SUMMARY="$RESULT_DIR/summary.json"
+
+python3 - "$RESULT_DIR" > "$SUMMARY" << 'PYEOF'
+import sys, os, re, json
+from pathlib import Path
+
+result_dir = Path(sys.argv[1])
+results = []
+
+for logfile in sorted(result_dir.glob("*.log")):
+    content = logfile.read_text()
+    if "FAILED" in content:
+        continue
+
+    # Parse the pipe-delimited llama-bench table
+    for line in content.splitlines():
+        line = line.strip()
+        if not line.startswith("|") or "model" in line.lower() and "size" in line.lower():
+            continue
+        if "---" in line:
+            continue
+
+        parts = [p.strip() for p in line.split("|")]
+        if len(parts) < 10:
+            continue
+
+        # Columns: | model | size | params | backend | ngl | fa | mmap | test | t/s |
+        try:
+            test_type = parts[8].strip() if len(parts) > 8 else ""
+            ts_raw = parts[9].strip() if len(parts) > 9 else ""
+            if not test_type or not ts_raw:
+                continue
+
+            # Parse "548.18 +/- 1.59" or just "548.18"
+            ts_match = re.match(r'([\d.]+)', ts_raw)
+            if not ts_match:
+                continue
+
+            results.append({
+                "file": logfile.name,
+                "model": parts[1].strip(),
+                "size": parts[2].strip(),
+                "backend": parts[4].strip(),
+                "test": test_type,
+                "tokens_per_sec": float(ts_match.group(1)),
+                "raw": ts_raw,
+            })
+        except (ValueError, IndexError):
+            continue
+
+print(json.dumps({"results": results}, indent=2))
+PYEOF
+
+# ── 6. Display summary ──────────────────────────────────
+log_header "Baseline Results"
+
+python3 - "$SUMMARY" << 'PYEOF'
+import sys, json
+
+with open(sys.argv[1]) as f:
+    data = json.load(f)
+
+if not data["results"]:
+    print("  No results parsed. Check log files for errors.")
+    sys.exit(0)
+
+# Print table
+fmt = "  {:<20} {:<16} {:<8} {:>10}"
+print(fmt.format("Model", "Backend", "Test", "t/s"))
+print("  " + "-" * 58)
+for r in data["results"]:
+    print(fmt.format(
+        r["model"][:20],
+        r["backend"][:16],
+        r["test"],
+        f"{r['tokens_per_sec']:.2f}"
+    ))
+PYEOF
+
+echo ""
+log_success "Baseline saved to: $RESULT_DIR"
+log_info "Files: system-state.json, summary.json, metrics.csv, *.log"
+log_info "Compare later with: bin/benchmark compare $RESULT_DIR <new-run-dir>"
diff --git a/scripts/benchmark/run-suite.sh b/scripts/benchmark/run-suite.sh
new file mode 100644
index 0000000..e996cb8
--- /dev/null
+++ b/scripts/benchmark/run-suite.sh
@@ -0,0 +1,194 @@
+#!/usr/bin/env bash
+# Full benchmark suite — run all backends × models with tagging
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+MODEL_DIR="$(data_dir models)"
+TAG="run"
+BACKENDS_FILTER=""
+MODELS_FILTER=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --tag|-t)      TAG="$2"; shift 2 ;;
+        --backends|-b) BACKENDS_FILTER="$2"; shift 2 ;;
+        --models|-m)   MODELS_FILTER="$2"; shift 2 ;;
+        *) shift ;;
+    esac
+done
+
+TS="$(timestamp)"
+RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
+mkdir -p "$RESULT_DIR"
+
+REPS_STANDARD=5
+REPS_LONGCTX=3
+
+log_header "Benchmark Suite: $TAG"
+log_info "Results: $RESULT_DIR"
+
+# Save system state
+bash "$SCRIPT_DIR/../audit/system-report.sh" --json > "$RESULT_DIR/system-state.json" 2>/dev/null
+
+# Discover backends
+existing="$(detect_toolbox_names 2>/dev/null || true)"
+
+declare -A BENCH_PATHS=(
+    [llama-vulkan-radv]="/usr/sbin/llama-bench"
+    [llama-vulkan-amdvlk]="/usr/sbin/llama-bench"
+    [llama-rocm-6.4.4]="/usr/local/bin/llama-bench"
+    [llama-rocm-7.2]="/usr/local/bin/llama-bench"
+    [llama-rocm7-nightlies]="/usr/local/bin/llama-bench"
+)
+
+available_backends=()
+for tb in "${!BENCH_PATHS[@]}"; do
+    if echo "$existing" | grep -q "^${tb}$"; then
+        if [[ -z "$BACKENDS_FILTER" ]] || echo "$BACKENDS_FILTER" | tr ',' '\n' | grep -q "$tb"; then
+            available_backends+=("$tb")
+        fi
+    fi
+done
+
+if (( ${#available_backends[@]} == 0 )); then
+    log_error "No matching backends. Run: make benchmark-setup"
+    exit 1
+fi
+log_info "Backends: ${available_backends[*]}"
+
+# Find models
+mapfile -t MODEL_PATHS < <(
+    find "$MODEL_DIR" -type f -name '*.gguf' \
+        \( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \
+        | sort
+)
+
+if [[ -n "$MODELS_FILTER" ]]; then
+    filtered=()
+    for p in "${MODEL_PATHS[@]}"; do
+        name="$(basename "$p")"
+        if echo "$MODELS_FILTER" | tr ',' '\n' | grep -qi "$name"; then
+            filtered+=("$p")
+        fi
+    done
+    MODEL_PATHS=("${filtered[@]}")
+fi
+
+if (( ${#MODEL_PATHS[@]} == 0 )); then
+    log_error "No models found. Run: make benchmark-setup"
+    exit 1
+fi
+log_info "Models: ${#MODEL_PATHS[@]}"
+
+# Start metric logging
+METRICS_FILE="$RESULT_DIR/metrics.csv"
+bash "$SCRIPT_DIR/../monitor/log-metrics.sh" --output "$METRICS_FILE" --interval 2 &
+METRICS_PID=$!
+trap 'kill "$METRICS_PID" 2>/dev/null; wait "$METRICS_PID" 2>/dev/null' EXIT
+
+# Run benchmarks (same logic as run-baseline.sh)
+for MODEL_PATH in "${MODEL_PATHS[@]}"; do
+    MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
+
+    for BACKEND in "${available_backends[@]}"; do
+        BENCH_BIN="${BENCH_PATHS[$BACKEND]}"
+        BACKEND_SAFE="${BACKEND//[.-]/_}"
+
+        ENV_ARGS=()
+        [[ "$BACKEND" == *rocm* ]] && ENV_ARGS=(env ROCBLAS_USE_HIPBLASLT=1)
+
+        # Standard test
+        OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log"
+        if [[ ! -s "$OUT" ]]; then
+            printf "\n${BOLD}>> [%s] %s — standard${RESET}\n" "$BACKEND" "$MODEL_NAME"
+            CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                -ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1 -r "$REPS_STANDARD")
+            if "${CMD[@]}" > "$OUT" 2>&1; then
+                log_success "Done"; tail -3 "$OUT"
+            else
+                log_error "Failed"; echo "FAILED" >> "$OUT"
+            fi
+        fi
+
+        # Long-context test
+        OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
+        if [[ ! -s "$OUT_LC" ]]; then
+            printf "\n${BOLD}>> [%s] %s — longctx${RESET}\n" "$BACKEND" "$MODEL_NAME"
+            UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
+            CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                -ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1
+                -p 2048 -n 32 -d 32768 -ub "$UB_SIZE" -r "$REPS_LONGCTX")
+            if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
+                log_success "Done"; tail -3 "$OUT_LC"
+            else
+                log_error "Failed"; echo "FAILED" >> "$OUT_LC"
+            fi
+        fi
+    done
+done
+
+# Parse results
+SUMMARY="$RESULT_DIR/summary.json"
+# Parse llama-bench log files into summary JSON
+python3 - "$RESULT_DIR" > "$SUMMARY" << 'PYEOF'
+import sys, os, re, json
+from pathlib import Path
+
+result_dir = Path(sys.argv[1])
+results = []
+
+for logfile in sorted(result_dir.glob("*.log")):
+    content = logfile.read_text()
+    if "FAILED" in content:
+        continue
+    for line in content.splitlines():
+        line = line.strip()
+        if not line.startswith("|") or "model" in line.lower() and "size" in line.lower():
+            continue
+        if "---" in line:
+            continue
+        parts = [p.strip() for p in line.split("|")]
+        if len(parts) < 10:
+            continue
+        try:
+            test_type = parts[8].strip()
+            ts_raw = parts[9].strip()
+            ts_match = re.match(r'([\d.]+)', ts_raw)
+            if not ts_match:
+                continue
+            results.append({
+                "file": logfile.name,
+                "model": parts[1].strip(),
+                "size": parts[2].strip(),
+                "backend": parts[4].strip(),
+                "test": test_type,
+                "tokens_per_sec": float(ts_match.group(1)),
+                "raw": ts_raw,
+            })
+        except (ValueError, IndexError):
+            continue
+
+print(json.dumps({"results": results}, indent=2))
+PYEOF
+
+log_header "Results"
+python3 - "$SUMMARY" << 'PYEOF'
+import sys, json
+with open(sys.argv[1]) as f:
+    data = json.load(f)
+if not data["results"]:
+    print("  No results parsed.")
+    sys.exit(0)
+fmt = "  {:<20} {:<16} {:<8} {:>10}"
+print(fmt.format("Model", "Backend", "Test", "t/s"))
+print("  " + "-" * 58)
+for r in data["results"]:
+    print(fmt.format(r["model"][:20], r["backend"][:16], r["test"], f"{r['tokens_per_sec']:.2f}"))
+PYEOF
+
+echo ""
+log_success "Results saved to: $RESULT_DIR"
diff --git a/scripts/benchmark/setup.sh b/scripts/benchmark/setup.sh
new file mode 100644
index 0000000..fb56c49
--- /dev/null
+++ b/scripts/benchmark/setup.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+# Benchmark setup — ensure toolboxes and test models are ready
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+
+TOOLBOXES_REPO="/data/workspace/projects/HomeLab/strix-halo-toolboxes/amd-strix-halo-llamacpp-toolboxes"
+MODEL_DIR="$(data_dir models)"
+
+log_header "Benchmark Setup"
+
+# ── 1. Check toolbox containers ──────────────────────────
+log_info "Checking toolbox containers..."
+
+# Minimum required: vulkan-radv (most stable)
+REQUIRED_TOOLBOXES=("llama-vulkan-radv")
+OPTIONAL_TOOLBOXES=("llama-rocm-6.4.4" "llama-rocm-7.2" "llama-vulkan-amdvlk")
+
+existing=$(detect_toolbox_names 2>/dev/null || true)
+missing=()
+
+for tb in "${REQUIRED_TOOLBOXES[@]}"; do
+    if echo "$existing" | grep -q "^${tb}$"; then
+        log_success "Toolbox: $tb"
+    else
+        missing+=("$tb")
+        log_warn "Toolbox missing: $tb"
+    fi
+done
+
+for tb in "${OPTIONAL_TOOLBOXES[@]}"; do
+    if echo "$existing" | grep -q "^${tb}$"; then
+        log_success "Toolbox: $tb (optional)"
+    else
+        log_info "Toolbox not present: $tb (optional)"
+    fi
+done
+
+if (( ${#missing[@]} > 0 )); then
+    log_info "Need to create required toolboxes."
+    if [[ -d "$TOOLBOXES_REPO" ]]; then
+        log_info "Found toolboxes repo at: $TOOLBOXES_REPO"
+        if confirm "Create missing toolboxes using refresh-toolboxes.sh?"; then
+            for tb in "${missing[@]}"; do
+                log_info "Creating $tb..."
+                bash "$TOOLBOXES_REPO/refresh-toolboxes.sh" "$tb"
+            done
+        fi
+    else
+        log_error "Toolboxes repo not found at: $TOOLBOXES_REPO"
+        log_info "Clone it: git clone https://github.com/kyuz0/amd-strix-halo-toolboxes"
+        log_info "Then re-run this setup."
+        exit 1
+    fi
+fi
+
+# ── 2. Verify GPU access inside toolboxes ────────────────
+log_info "Verifying GPU access in toolboxes..."
+for tb in "${REQUIRED_TOOLBOXES[@]}"; do
+    if echo "$existing" | grep -qF "$tb"; then
+        if toolbox run -c "$tb" -- llama-cli --list-devices 2>&1 | grep -qi "gpu\|vulkan\|rocm"; then
+            log_success "GPU accessible in $tb"
+        else
+            log_warn "GPU may not be accessible in $tb — check device mappings"
+        fi
+    fi
+done
+
+# ── 3. Check for test models ────────────────────────────
+log_info "Checking for test models in $MODEL_DIR..."
+
+model_count=$(find "$MODEL_DIR" -name "*.gguf" 2>/dev/null | wc -l)
+if (( model_count > 0 )); then
+    log_success "Found $model_count model(s):"
+    find "$MODEL_DIR" -name "*.gguf" | while read -r f; do
+        size=$(du -h "$f" | cut -f1)
+        printf "  %s (%s)\n" "$(basename "$f")" "$size"
+    done
+else
+    log_warn "No GGUF models found in $MODEL_DIR"
+    log_info "Download a test model. Example:"
+    echo ""
+    echo "  # Small (4B, ~3 GB):"
+    echo "  huggingface-cli download Qwen/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf \\"
+    echo "    --local-dir $MODEL_DIR"
+    echo ""
+    echo "  # Medium (14B, ~9 GB):"
+    echo "  huggingface-cli download Qwen/Qwen3-14B-GGUF Qwen3-14B-Q4_K_M.gguf \\"
+    echo "    --local-dir $MODEL_DIR"
+    echo ""
+
+    if is_cmd huggingface-cli; then
+        if confirm "Download Qwen3-4B Q4_K_M (~3 GB) as test model?"; then
+            huggingface-cli download Qwen/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf \
+                --local-dir "$MODEL_DIR"
+            log_success "Model downloaded"
+        fi
+    else
+        log_info "Install huggingface-cli: pip install huggingface_hub[cli]"
+    fi
+fi
+
+log_header "Setup Complete"
+log_info "Run 'make benchmark-baseline' to capture your baseline."
diff --git a/scripts/monitor/dashboard.sh b/scripts/monitor/dashboard.sh
new file mode 100644
index 0000000..06c1c96
--- /dev/null
+++ b/scripts/monitor/dashboard.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+# Tmux-based monitoring dashboard
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+
+SESSION="strix-monitor"
+SIMPLE=false
+WITH_LOG=false
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --simple|-s) SIMPLE=true; shift ;;
+        --with-logging|-l) WITH_LOG=true; shift ;;
+        *) shift ;;
+    esac
+done
+
+# Simple mode: just launch amdgpu_top
+if $SIMPLE; then
+    if is_cmd amdgpu_top; then
+        exec amdgpu_top
+    elif is_cmd nvtop; then
+        log_warn "amdgpu_top not found, falling back to nvtop"
+        exec nvtop
+    else
+        log_error "No GPU monitor installed. Run: make monitor-install"
+        exit 1
+    fi
+fi
+
+# Full dashboard requires tmux
+if ! is_cmd tmux; then
+    log_error "tmux is required for dashboard mode. Run: make monitor-install"
+    exit 1
+fi
+
+# Pick GPU monitor
+GPU_MON="nvtop"
+if is_cmd amdgpu_top; then
+    GPU_MON="amdgpu_top"
+fi
+
+# Pick system monitor
+SYS_MON="htop"
+if is_cmd btop; then
+    SYS_MON="btop"
+elif ! is_cmd htop; then
+    SYS_MON="top"
+fi
+
+# Kill existing session if running
+tmux kill-session -t "$SESSION" 2>/dev/null || true
+
+# Start background logging if requested
+LOG_CMD="echo Metric logging not active. Use --with-logging to enable.; read -r"
+LOG_PID=""
+if $WITH_LOG; then
+    LOG_FILE="$(data_dir logs)/metrics-$(timestamp).csv"
+    bash "$SCRIPT_DIR/log-metrics.sh" --output "$LOG_FILE" &
+    LOG_PID=$!
+    LOG_CMD="tail -f \"$LOG_FILE\""
+    log_info "Metric logger started (PID: $LOG_PID) → $LOG_FILE"
+fi
+
+# Cleanup logger on exit
+cleanup() {
+    if [[ -n "$LOG_PID" ]]; then
+        kill "$LOG_PID" 2>/dev/null || true
+        wait "$LOG_PID" 2>/dev/null || true
+    fi
+}
+trap cleanup EXIT
+
+# Create tmux layout
+# +--------------------+--------------------+
+# |   GPU monitor      |   System monitor   |
+# |                    |                    |
+# +--------------------------------------------+
+# |   Metrics log tail / status                 |
+# +--------------------------------------------+
+tmux new-session -d -s "$SESSION" -x "$(tput cols 2>/dev/null || echo 120)" -y "$(tput lines 2>/dev/null || echo 40)" "$GPU_MON"
+tmux split-window -t "$SESSION" -h "$SYS_MON"
+tmux split-window -t "$SESSION" -v -p 20 "$LOG_CMD"
+tmux select-pane -t "$SESSION:0.0"
+
+log_info "Dashboard started. Attach with: tmux attach -t $SESSION"
+log_info "Detach with Ctrl+B then D. Kill with: tmux kill-session -t $SESSION"
+tmux attach -t "$SESSION"
diff --git a/scripts/monitor/install-tools.sh b/scripts/monitor/install-tools.sh
new file mode 100644
index 0000000..23eb1f2
--- /dev/null
+++ b/scripts/monitor/install-tools.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+# Install monitoring tools for Strix Halo
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+
+log_header "Monitoring Tools Installer"
+
+# ── amdgpu_top (most important) ─────────────────────────
+if is_cmd amdgpu_top; then
+    log_success "amdgpu_top already installed: $(amdgpu_top --version 2>&1 | head -1)"
+else
+    log_info "Installing amdgpu_top (best AMD GPU monitor)..."
+    installed=false
+
+    # Method 1: Install RPM from GitHub releases (fastest, works on Fedora)
+    if ! $installed; then
+        log_info "Downloading pre-built RPM from GitHub releases..."
+        AMDGPU_TOP_VERSION="0.11.2"
+        RPM_URL="https://github.com/Umio-Yasuno/amdgpu_top/releases/download/v${AMDGPU_TOP_VERSION}/amdgpu_top-${AMDGPU_TOP_VERSION}-1.x86_64.rpm"
+        RPM_FILE="/tmp/amdgpu_top-${AMDGPU_TOP_VERSION}.rpm"
+
+        if curl -fsSL -o "$RPM_FILE" "$RPM_URL" 2>/dev/null; then
+            if sudo dnf install -y "$RPM_FILE" 2>&1; then
+                installed=true
+                log_success "amdgpu_top installed from RPM"
+                rm -f "$RPM_FILE"
+            else
+                log_warn "RPM install failed"
+            fi
+        else
+            log_warn "RPM download failed"
+        fi
+    fi
+
+    # Method 2: Try dnf repos
+    if ! $installed; then
+        log_info "Trying dnf repos..."
+        if sudo dnf install -y amdgpu_top 2>/dev/null; then
+            installed=true
+            log_success "amdgpu_top installed via dnf"
+        fi
+    fi
+
+    # Method 3: cargo (if available)
+    if ! $installed && is_cmd cargo; then
+        log_info "Building from source via cargo..."
+        if cargo install amdgpu_top 2>&1; then
+            installed=true
+            log_success "amdgpu_top installed via cargo"
+        else
+            log_warn "cargo install failed"
+        fi
+    fi
+
+    if ! $installed; then
+        log_warn "Could not install amdgpu_top automatically."
+        log_info "Manual options:"
+        log_info "  1. Download RPM:     curl -LO $RPM_URL && sudo dnf install ./amdgpu_top-*.rpm"
+        log_info "  2. Download AppImage: https://github.com/Umio-Yasuno/amdgpu_top/releases/latest"
+    fi
+fi
+
+# ── btop ─────────────────────────────────────────────────
+if is_cmd btop; then
+    log_success "btop already installed"
+else
+    log_info "Installing btop..."
+    if sudo dnf install -y btop 2>&1; then
+        log_success "btop installed"
+    else
+        log_warn "Could not install btop via dnf"
+    fi
+fi
+
+# ── tmux (needed for dashboard) ──────────────────────────
+if is_cmd tmux; then
+    log_success "tmux already installed"
+else
+    log_info "Installing tmux..."
+    if sudo dnf install -y tmux 2>&1; then
+        log_success "tmux installed"
+    else
+        log_warn "Could not install tmux via dnf"
+    fi
+fi
+
+# ── Verify existing tools ───────────────────────────────
+log_header "Monitoring Tools Status"
+for tool in amdgpu_top nvtop btop amd-smi rocm-smi tmux; do
+    if is_cmd "$tool"; then
+        log_success "$tool"
+    else
+        log_warn "$tool — not installed"
+    fi
+done
diff --git a/scripts/monitor/log-metrics.sh b/scripts/monitor/log-metrics.sh
new file mode 100644
index 0000000..7b48360
--- /dev/null
+++ b/scripts/monitor/log-metrics.sh
@@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+# Background metric collector — samples GPU and system stats to CSV
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+
+INTERVAL=2
+OUTPUT=""
+DURATION=0  # 0 = indefinite
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --output|-o)  OUTPUT="$2"; shift 2 ;;
+        --interval|-i) INTERVAL="$2"; shift 2 ;;
+        --duration|-d) DURATION="$2"; shift 2 ;;
+        --help|-h)
+            echo "Usage: log-metrics.sh [--output FILE] [--interval SECS] [--duration SECS]"
+            exit 0 ;;
+        *) log_warn "Unknown argument: $1"; shift ;;
+    esac
+done
+
+# Validate numeric args
+[[ "$INTERVAL" =~ ^[0-9]+$ ]] || { log_error "--interval must be a positive integer"; exit 1; }
+[[ "$DURATION" =~ ^[0-9]+$ ]] || { log_error "--duration must be a positive integer"; exit 1; }
+
+if [[ -z "$OUTPUT" ]]; then
+    OUTPUT="$(data_dir logs)/metrics-$(timestamp).csv"
+fi
+
+mkdir -p "$(dirname "$OUTPUT")"
+
+# Cache sysfs paths once (avoid re-globbing every iteration)
+SYSFS_GPU_BUSY="$GPU_SYSFS/gpu_busy_percent"
+SYSFS_VRAM_USED="$GPU_SYSFS/mem_info_vram_used"
+SYSFS_GTT_USED="$GPU_SYSFS/mem_info_gtt_used"
+SYSFS_TEMP=""
+SYSFS_POWER=""
+for f in "$GPU_SYSFS"/hwmon/hwmon*/temp1_input; do
+    [[ -f "$f" ]] && SYSFS_TEMP="$f" && break
+done
+for f in "$GPU_SYSFS"/hwmon/hwmon*/power1_average; do
+    [[ -f "$f" ]] && SYSFS_POWER="$f" && break
+done
+
+# Write CSV header
+echo "timestamp,gpu_busy_pct,vram_used_mib,gtt_used_mib,gpu_temp_c,gpu_power_w,cpu_pct,ram_used_mib" > "$OUTPUT"
+
+log_info "Logging metrics every ${INTERVAL}s → $OUTPUT"
+[[ $DURATION -gt 0 ]] && log_info "Will stop after ${DURATION}s"
+
+start_time=$SECONDS
+stopped=false
+
+cleanup() {
+    $stopped && return
+    stopped=true
+    local lines
+    lines=$(( $(wc -l < "$OUTPUT") - 1 ))
+    log_info "Metric logger stopped. $lines samples in $OUTPUT"
+}
+trap cleanup EXIT
+
+# Read /proc/stat fields into variables using bash builtins
+read_cpu_stat() {
+    local line
+    read -r line < /proc/stat
+    # "cpu  user nice system idle iowait irq softirq steal"
+    set -- $line
+    shift  # drop "cpu"
+    CPU_TOTAL=$(( $1 + $2 + $3 + $4 + $5 + $6 + $7 + ${8:-0} ))
+    CPU_IDLE=$4
+}
+
+while true; do
+    ts="$(printf '%(%Y-%m-%d %H:%M:%S)T' -1)"
+
+    # GPU metrics — direct reads, no subshells
+    read -r gpu_busy < "$SYSFS_GPU_BUSY" 2>/dev/null || gpu_busy=0
+    read -r vram_bytes < "$SYSFS_VRAM_USED" 2>/dev/null || vram_bytes=0
+    read -r gtt_bytes < "$SYSFS_GTT_USED" 2>/dev/null || gtt_bytes=0
+    read -r temp_mc < "$SYSFS_TEMP" 2>/dev/null || temp_mc=0
+    read -r power_uw < "$SYSFS_POWER" 2>/dev/null || power_uw=0
+
+    vram_mib=$(( vram_bytes / 1048576 ))
+    gtt_mib=$(( gtt_bytes / 1048576 ))
+    gpu_temp_c=$(( temp_mc / 1000 )).$(( (temp_mc % 1000) / 100 ))
+    gpu_power_w=$(( power_uw / 1000000 )).$(( (power_uw % 1000000) / 100000 ))
+
+    # CPU usage (snapshot delta)
+    read_cpu_stat
+    prev_total=$CPU_TOTAL
+    prev_idle=$CPU_IDLE
+    sleep 0.1
+    read_cpu_stat
+    delta_total=$(( CPU_TOTAL - prev_total ))
+    delta_idle=$(( CPU_IDLE - prev_idle ))
+    if (( delta_total > 0 )); then
+        cpu_pct=$(( (delta_total - delta_idle) * 1000 / delta_total ))
+        # Format N as N/10 . N%10, handling single-digit values (e.g., 5 → 0.5)
+        cpu_pct_fmt="$(( cpu_pct / 10 )).$(( cpu_pct % 10 ))"
+    else
+        cpu_pct_fmt="0.0"
+    fi
+
+    # RAM used (bash builtins only)
+    local_mem_total=0
+    local_mem_avail=0
+    while IFS=': ' read -r key val _; do
+        case "$key" in
+            MemTotal)     local_mem_total=$val ;;
+            MemAvailable) local_mem_avail=$val; break ;;
+        esac
+    done < /proc/meminfo
+    ram_used_mib=$(( (local_mem_total - local_mem_avail) / 1024 ))
+
+    echo "$ts,$gpu_busy,$vram_mib,$gtt_mib,$gpu_temp_c,$gpu_power_w,$cpu_pct_fmt,$ram_used_mib" >> "$OUTPUT"
+
+    # Check duration
+    if (( DURATION > 0 && SECONDS - start_time >= DURATION )); then
+        break
+    fi
+
+    sleep "$INTERVAL"
+done
diff --git a/scripts/optimize/kernel-params.sh b/scripts/optimize/kernel-params.sh
new file mode 100644
index 0000000..e81e79a
--- /dev/null
+++ b/scripts/optimize/kernel-params.sh
@@ -0,0 +1,149 @@
+#!/usr/bin/env bash
+# Configure kernel boot parameters for unified memory optimization
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+GRUB_FILE="/etc/default/grub"
+
+log_header "Kernel Boot Parameter Optimization"
+
+# ── Check root early ────────────────────────────────────
+if [[ $EUID -ne 0 ]]; then
+    log_error "This script requires root. Re-run with: sudo make optimize-kernel"
+    exit 1
+fi
+
+# ── Show current state ───────────────────────────────────
+log_info "Current kernel command line:"
+printf "  ${DIM}%s${RESET}\n" "$(cat /proc/cmdline)"
+echo ""
+
+param_iommu="$(detect_kernel_param 'iommu')"
+param_gttsize="$(detect_gttsize_param)"
+param_pages="$(detect_pages_limit_param)"
+
+rec_gttsize="$(recommended_gttsize_mib)"
+rec_pages="$(recommended_pages_limit)"
+
+# ── Check what's needed ──────────────────────────────────
+needs_change=false
+
+log_info "Parameter status:"
+
+if [[ "$param_iommu" == "pt" ]]; then
+    print_status pass "iommu=pt" "already set"
+else
+    print_status fail "iommu=pt" "$([ -n "$param_iommu" ] && echo "current: $param_iommu" || echo "missing")"
+    needs_change=true
+fi
+
+if [[ -n "$param_gttsize" ]] && (( param_gttsize >= rec_gttsize )); then
+    print_status pass "amdgpu.gttsize" "current: $param_gttsize MiB"
+else
+    print_status fail "amdgpu.gttsize" "$([ -n "$param_gttsize" ] && echo "current: $param_gttsize MiB, " || echo "missing, ")recommended: $rec_gttsize MiB (~$(human_mib "$rec_gttsize"))"
+    needs_change=true
+fi
+
+if [[ -n "$param_pages" ]] && (( param_pages >= rec_pages )); then
+    print_status pass "ttm.pages_limit" "current: $param_pages"
+else
+    print_status fail "ttm.pages_limit" "$([ -n "$param_pages" ] && echo "current: $param_pages, " || echo "missing, ")recommended: $rec_pages"
+    needs_change=true
+fi
+
+if ! $needs_change; then
+    echo ""
+    log_success "All kernel parameters are already optimal!"
+    exit 0
+fi
+
+# ── Explain what we're doing ─────────────────────────────
+echo ""
+log_info "These parameters enable unified memory for the integrated GPU:"
+echo "  iommu=pt              IOMMU passthrough — reduces memory access latency"
+echo "  amdgpu.gttsize=$rec_gttsize  GPU can dynamically access ~$(human_mib "$rec_gttsize") system RAM"
+echo "  ttm.pages_limit=$rec_pages  Pin limit for GPU memory pages ($(human_mib "$rec_gttsize") in 4K pages)"
+echo ""
+
+# ── Apply changes ────────────────────────────────────────
+if ! confirm "Apply these kernel parameters to GRUB?"; then
+    log_info "Skipped. You can apply manually by editing $GRUB_FILE"
+    exit 0
+fi
+
+# Backup
+BACKUP_DIR="$(data_dir backups)"
+backup_file="$BACKUP_DIR/grub-$(timestamp).bak"
+cp "$GRUB_FILE" "$backup_file"
+log_success "GRUB backup saved: $backup_file"
+
+# Parse current GRUB_CMDLINE_LINUX using Python (data via env vars, not interpolation)
+current_cmdline="$(GRUB_PATH="$GRUB_FILE" python3 -c '
+import re, os
+with open(os.environ["GRUB_PATH"]) as f:
+    for line in f:
+        m = re.match(r"^GRUB_CMDLINE_LINUX=\"(.*)\"", line)
+        if m:
+            print(m.group(1))
+            raise SystemExit(0)
+print("")
+')"
+
+# Remove any existing values of these params
+new_cmdline="$current_cmdline"
+new_cmdline="$(echo "$new_cmdline" | sed -E 's/\biommu=[^ ]*//g')"
+new_cmdline="$(echo "$new_cmdline" | sed -E 's/\bamd_iommu=[^ ]*//g')"
+new_cmdline="$(echo "$new_cmdline" | sed -E 's/\bamdgpu\.gttsize=[^ ]*//g')"
+new_cmdline="$(echo "$new_cmdline" | sed -E 's/\bttm\.pages_limit=[^ ]*//g')"
+# Clean up extra spaces
+new_cmdline="$(echo "$new_cmdline" | xargs)"
+
+# Add new params
+new_cmdline="$new_cmdline iommu=pt amdgpu.gttsize=$rec_gttsize ttm.pages_limit=$rec_pages"
+
+log_info "GRUB_CMDLINE_LINUX change:"
+printf "  ${RED}Before:${RESET} %s\n" "$current_cmdline"
+printf "  ${GREEN}After:${RESET}  %s\n" "$new_cmdline"
+echo ""
+
+if ! confirm "Write this change?"; then
+    log_info "Aborted. Backup remains at: $backup_file"
+    exit 0
+fi
+
+# Apply using Python (all data via env vars — no shell interpolation into Python code)
+GRUB_PATH="$GRUB_FILE" NEW_CMDLINE="$new_cmdline" python3 -c '
+import re, os
+grub_path = os.environ["GRUB_PATH"]
+new_line = "GRUB_CMDLINE_LINUX=\"" + os.environ["NEW_CMDLINE"] + "\""
+with open(grub_path) as f:
+    content = f.read()
+content = re.sub(r"^GRUB_CMDLINE_LINUX=.*", new_line, content, count=1, flags=re.MULTILINE)
+with open(grub_path, "w") as f:
+    f.write(content)
+'
+log_success "GRUB config updated"
+
+# Regenerate GRUB — prefer grubby on modern Fedora (BLS), fall back to grub2-mkconfig
+log_info "Regenerating boot configuration..."
+if is_cmd grubby; then
+    grubby --update-kernel=ALL --args="iommu=pt amdgpu.gttsize=$rec_gttsize ttm.pages_limit=$rec_pages"
+    log_success "Boot entries updated via grubby"
+elif [[ -d /boot/grub2 ]]; then
+    grub2-mkconfig -o /boot/grub2/grub.cfg
+    log_success "GRUB regenerated via grub2-mkconfig"
+elif [[ -d /boot/grub ]]; then
+    grub-mkconfig -o /boot/grub/grub.cfg
+    log_success "GRUB regenerated via grub-mkconfig"
+else
+    log_error "Could not find grubby or grub config directory. Regenerate manually."
+    exit 1
+fi
+
+echo ""
+log_warn "REBOOT REQUIRED for kernel parameters to take effect."
+log_info "After reboot, verify with: make audit"
diff --git a/scripts/optimize/rollback.sh b/scripts/optimize/rollback.sh
new file mode 100644
index 0000000..5eeb758
--- /dev/null
+++ b/scripts/optimize/rollback.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# Rollback optimization changes
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+
+GRUB_FILE="/etc/default/grub"
+BACKUP_DIR="$(data_dir backups)"
+
+log_header "Rollback Optimizations"
+
+# ── 1. GRUB rollback ────────────────────────────────────
+log_info "GRUB backups:"
+mapfile -t grub_backups < <(find "$BACKUP_DIR" -name 'grub-*.bak' -print 2>/dev/null | sort -r)
+
+if (( ${#grub_backups[@]} == 0 )); then
+    log_info "  No GRUB backups found"
+else
+    for i in "${!grub_backups[@]}"; do
+        printf "  [%d] %s\n" "$i" "${grub_backups[$i]}"
+    done
+    echo ""
+
+    if confirm "Restore most recent GRUB backup?"; then
+        require_root
+        backup="${grub_backups[0]}"
+        cp "$backup" "$GRUB_FILE"
+        log_success "GRUB restored from: $backup"
+
+        log_info "Regenerating boot configuration..."
+        if is_cmd grubby; then
+            # On BLS systems, also need to remove args via grubby
+            grubby --update-kernel=ALL --remove-args="iommu amdgpu.gttsize ttm.pages_limit" 2>/dev/null || true
+            log_success "Boot entries updated via grubby"
+        elif [[ -d /boot/grub2 ]]; then
+            grub2-mkconfig -o /boot/grub2/grub.cfg
+            log_success "GRUB regenerated via grub2-mkconfig"
+        elif [[ -d /boot/grub ]]; then
+            grub-mkconfig -o /boot/grub/grub.cfg
+            log_success "GRUB regenerated via grub-mkconfig"
+        else
+            log_error "Could not find grubby or grub config directory. Regenerate manually."
+        fi
+        log_warn "Reboot required for changes to take effect."
+    fi
+fi
+
+# ── 2. Tuned profile rollback ───────────────────────────
+prev_profile_file="$BACKUP_DIR/tuned-previous-profile.txt"
+if [[ -f "$prev_profile_file" ]]; then
+    prev_profile="$(cat "$prev_profile_file")"
+    current="$(tuned-adm active 2>/dev/null | sed 's/Current active profile: //' || echo "unknown")"
+    log_info "Tuned profile: $current (previous: $prev_profile)"
+
+    if [[ "$current" != "$prev_profile" ]] && confirm "Restore tuned profile to $prev_profile?"; then
+        sudo tuned-adm profile "$prev_profile"
+        log_success "Tuned profile restored to: $prev_profile"
+    fi
+else
+    log_info "No previous tuned profile saved"
+fi
+
+# ── 3. BIOS reminder ────────────────────────────────────
+echo ""
+log_warn "BIOS VRAM changes cannot be rolled back automatically."
+log_info "To revert: Reboot → F10 → Advanced → UMA Frame Buffer Size → restore previous value"
diff --git a/scripts/optimize/tuned-profile.sh b/scripts/optimize/tuned-profile.sh
new file mode 100644
index 0000000..21a5f3f
--- /dev/null
+++ b/scripts/optimize/tuned-profile.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Switch tuned profile to accelerator-performance
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+
+RECOMMENDED="accelerator-performance"
+
+log_header "Tuned Profile Optimization"
+
+if ! is_cmd tuned-adm; then
+    log_error "tuned is not installed. Install with: sudo dnf install tuned"
+    exit 1
+fi
+
+current="$(detect_tuned_profile)"
+log_info "Current profile: $current"
+
+if [[ "$current" == "$RECOMMENDED" ]]; then
+    log_success "Already using $RECOMMENDED"
+    exit 0
+fi
+
+# Check availability
+if ! tuned-adm list 2>/dev/null | grep -q "$RECOMMENDED"; then
+    log_error "$RECOMMENDED profile not available"
+    log_info "Available profiles:"
+    tuned-adm list 2>/dev/null | grep "^-" | sed 's/^/  /'
+    exit 1
+fi
+
+echo ""
+log_info "Recommended: $RECOMMENDED"
+log_info "Description: Throughput performance with disabled higher latency STOP states"
+log_info "Benefit: 5-8% improvement in prompt processing (pp) benchmarks"
+log_info "No reboot required."
+echo ""
+
+if ! confirm "Switch to $RECOMMENDED?"; then
+    log_info "Skipped"
+    exit 0
+fi
+
+# Save current for rollback
+echo "$current" > "$(data_dir backups)/tuned-previous-profile.txt"
+
+sudo tuned-adm profile "$RECOMMENDED"
+
+new_profile="$(detect_tuned_profile)"
+if [[ "$new_profile" == "$RECOMMENDED" ]]; then
+    log_success "Profile switched to: $new_profile"
+else
+    log_error "Profile switch may have failed. Current: $new_profile"
+fi
diff --git a/scripts/optimize/verify.sh b/scripts/optimize/verify.sh
new file mode 100644
index 0000000..e31577e
--- /dev/null
+++ b/scripts/optimize/verify.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+# Post-optimization verification checklist
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+log_header "Optimization Verification"
+
+score=0
+total=0
+
+check() {
+    local pass="$1" label="$2" detail="$3"
+    total=$(( total + 1 ))
+    if [[ "$pass" == "1" ]]; then
+        score=$(( score + 1 ))
+        print_status pass "$label" "$detail"
+    else
+        print_status fail "$label" "$detail"
+    fi
+}
+
+# Kernel version
+kernel="$(detect_kernel_version)"
+kernel_major=$(echo "$kernel" | cut -d. -f1)
+kernel_minor=$(echo "$kernel" | cut -d. -f2)
+kernel_ok=0
+(( kernel_major > 6 || (kernel_major == 6 && kernel_minor >= 18) )) && kernel_ok=1
+check "$kernel_ok" "Kernel >= 6.18.4" "$kernel"
+
+# Firmware
+fw_ok=1
+detect_firmware_bad && fw_ok=0
+check "$fw_ok" "Firmware (not 20251125)" "$(detect_firmware_version)"
+
+# Kernel params
+iommu_val="$(detect_kernel_param 'iommu')"
+iommu_ok=0
+[[ "$iommu_val" == "pt" ]] && iommu_ok=1
+check "$iommu_ok" "iommu=pt" "${iommu_val:-not set}"
+
+gttsize="$(detect_gttsize_param)"
+rec_gttsize="$(recommended_gttsize_mib)"
+gtt_ok=0
+[[ -n "$gttsize" ]] && (( gttsize >= rec_gttsize )) && gtt_ok=1
+check "$gtt_ok" "amdgpu.gttsize" "${gttsize:-not set} (recommended: $rec_gttsize)"
+
+pages="$(detect_pages_limit_param)"
+rec_pages="$(recommended_pages_limit)"
+pages_ok=0
+[[ -n "$pages" ]] && (( pages >= rec_pages )) && pages_ok=1
+check "$pages_ok" "ttm.pages_limit" "${pages:-not set} (recommended: $rec_pages)"
+
+# Tuned profile
+tuned="$(detect_tuned_profile)"
+tuned_ok=0
+[[ "$tuned" == "accelerator-performance" ]] && tuned_ok=1
+check "$tuned_ok" "Tuned profile" "$tuned"
+
+# VRAM (should be <= 1 GiB)
+vram="$(detect_vram_total)"
+vram_gib=$(echo "scale=1; $vram / 1073741824" | bc)
+vram_ok=0
+(( vram <= 1073741824 )) && vram_ok=1
+check "$vram_ok" "VRAM <= 1 GiB" "${vram_gib} GiB"
+
+# GTT (should be close to recommended)
+gtt="$(detect_gtt_total)"
+gtt_gib=$(echo "scale=1; $gtt / 1073741824" | bc)
+rec_gtt_bytes=$(( rec_gttsize * 1048576 ))
+gtt_mem_ok=0
+(( gtt >= rec_gtt_bytes * 3 / 4 )) && gtt_mem_ok=1
+check "$gtt_mem_ok" "GTT >= $(human_mib "$rec_gttsize")" "${gtt_gib} GiB"
+
+# GPU monitor installed
+monitor_ok=0
+is_cmd amdgpu_top && monitor_ok=1
+check "$monitor_ok" "amdgpu_top installed" "$(is_cmd amdgpu_top && echo 'yes' || echo 'no — run make monitor-install')"
+
+# Summary
+echo ""
+print_divider
+printf "\n  ${BOLD}Score: %d / %d${RESET}\n" "$score" "$total"
+
+if (( score == total )); then
+    printf "  ${GREEN}Fully optimized!${RESET} Run 'make benchmark' to measure performance.\n"
+elif (( score >= total * 3 / 4 )); then
+    printf "  ${YELLOW}Nearly there${RESET} — check the failed items above.\n"
+elif (( score >= total / 2 )); then
+    printf "  ${YELLOW}Partially optimized${RESET} — run 'make optimize' for the remaining items.\n"
+else
+    printf "  ${RED}Significant optimizations pending${RESET} — run 'make optimize'\n"
+fi
+echo ""
diff --git a/scripts/optimize/vram-gtt.sh b/scripts/optimize/vram-gtt.sh
new file mode 100644
index 0000000..06bb21d
--- /dev/null
+++ b/scripts/optimize/vram-gtt.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# BIOS VRAM guidance + GTT verification
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/detect.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+log_header "VRAM / GTT Memory Optimization"
+
+vram_total="$(detect_vram_total)"
+gtt_total="$(detect_gtt_total)"
+vram_gib=$(echo "scale=1; $vram_total / 1073741824" | bc)
+gtt_gib=$(echo "scale=1; $gtt_total / 1073741824" | bc)
+
+log_info "Current memory allocation:"
+print_kv "VRAM (dedicated)" "${vram_gib} GiB"
+print_kv "GTT (dynamic)" "${gtt_gib} GiB"
+print_kv "System RAM (visible)" "$(echo "scale=1; $(detect_system_ram_kb) / 1048576" | bc) GiB"
+echo ""
+
+# ── Check if BIOS VRAM change is needed ──────────────────
+# Optimal: VRAM <= 1 GiB (0.5 GiB ideal), rest dynamically via GTT
+if (( vram_total > 1073741824 )); then
+    log_warn "VRAM is ${vram_gib} GiB — this permanently locks memory away from the OS."
+    log_info "AMD recommends 512 MB dedicated VRAM for Strix Halo."
+    log_info "The GPU accesses additional memory dynamically via GTT (kernel params)."
+    echo ""
+
+    printf "${BOLD}BIOS Configuration Steps (HP ZBook Ultra G1a):${RESET}\n"
+    echo ""
+    echo "  1. Reboot the laptop"
+    echo "  2. Press F10 repeatedly during boot to enter BIOS Setup"
+    echo "  3. Navigate to: Advanced > Built-in Device Options"
+    echo "     (or Advanced > Display > UMA Frame Buffer Size)"
+    echo "  4. Set UMA Frame Buffer Size to: 512 MB (or smallest available)"
+    echo "  5. Save and Exit (F10)"
+    echo ""
+    echo "  NOTE: The exact menu path may vary by BIOS version."
+    echo "  Look for 'UMA', 'Frame Buffer', 'VRAM', or 'iGPU Memory'."
+    echo ""
+
+    log_info "After BIOS change + reboot with kernel params, expected state:"
+    echo "  VRAM:       ~512 MiB"
+    echo "  GTT:        ~$(human_mib "$(recommended_gttsize_mib)") (with kernel params)"
+    echo "  System RAM: ~$(echo "scale=1; $(detect_total_physical_ram_kb) / 1048576 - 0.5" | bc) GiB visible"
+    echo ""
+
+elif (( vram_total <= 1073741824 )); then
+    log_success "VRAM is ${vram_gib} GiB — already optimal!"
+fi
+
+# ── Check GTT ────────────────────────────────────────────
+rec_gttsize="$(recommended_gttsize_mib)"
+rec_gtt_bytes=$(( rec_gttsize * 1048576 ))
+
+if (( gtt_total >= rec_gtt_bytes * 3 / 4 )); then
+    log_success "GTT is ${gtt_gib} GiB — good (recommended: ~$(human_mib "$rec_gttsize"))"
+else
+    log_warn "GTT is ${gtt_gib} GiB — low (recommended: ~$(human_mib "$rec_gttsize"))"
+    log_info "This requires kernel boot parameters. Run: make optimize-kernel"
+fi
+
+# ── Optional: amd-debug-tools ────────────────────────────
+echo ""
+log_header "Optional: amd-debug-tools (amd-ttm)"
+log_info "AMD provides 'amd-debug-tools' for runtime GTT/TTM inspection."
+
+if is_cmd amd-ttm; then
+    log_success "amd-ttm is installed"
+    log_info "Current GTT settings:"
+    amd-ttm 2>/dev/null || true
+elif is_cmd pipx; then
+    log_info "Install with: pipx install amd-debug-tools"
+    if confirm "Install amd-debug-tools via pipx?"; then
+        pipx install amd-debug-tools
+        log_success "Installed. Run 'amd-ttm' to inspect GTT allocation."
+    fi
+else
+    log_info "Install pipx first: sudo dnf install pipx"
+    log_info "Then: pipx install amd-debug-tools"
+fi