Files
Felipe Cardoso c596e38e9e Initial commit
2026-03-25 20:13:15 +01:00

181 lines
6.1 KiB
Bash

#!/usr/bin/env bash
# Quick-glance system audit — single screen status overview
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/../../lib/common.sh"
source "$SCRIPT_DIR/../../lib/detect.sh"
source "$SCRIPT_DIR/../../lib/format.sh"
# ── Gather data ──────────────────────────────────────────
cpu_model="$(detect_cpu_model)"
cpu_threads="$(detect_cpu_cores)"
cpu_physical="$(detect_cpu_physical)"
gpu_name="$(detect_gpu_name)"
kernel="$(detect_kernel_version)"
firmware="$(detect_firmware_version)"
vram_total="$(detect_vram_total)"
vram_used="$(detect_vram_used)"
gtt_total="$(detect_gtt_total)"
gtt_used="$(detect_gtt_used)"
ram_kb="$(detect_system_ram_kb)"
ram_bytes=$(( ram_kb * 1024 ))
param_iommu="$(detect_kernel_param 'iommu')"
param_gttsize="$(detect_gttsize_param)"
param_pages="$(detect_pages_limit_param)"
tuned="$(detect_tuned_profile)"
rocm_ver="$(detect_rocm_version)"
vulkan_drv="$(detect_vulkan_driver)"
vulkan_ver="$(detect_vulkan_version)"
rec_gttsize="$(recommended_gttsize_mib)"
rec_pages="$(recommended_pages_limit)"
# ── Score tracking ───────────────────────────────────────
score=0
total=0
check() {
local pass="$1" label="$2" detail="$3"
total=$(( total + 1 ))
if [[ "$pass" == "1" ]]; then
score=$(( score + 1 ))
print_status pass "$label" "$detail"
else
print_status fail "$label" "$detail"
fi
}
check_warn() {
local label="$1" detail="$2"
print_status warn "$label" "$detail"
}
check_info() {
local label="$1" detail="$2"
print_status info "$label" "$detail"
}
# ── Output ───────────────────────────────────────────────
printf "\n${BOLD}${CYAN}"
cat << 'BANNER'
╔═══════════════════════════════════════════╗
║ AMD Strix Halo — System Status ║
╚═══════════════════════════════════════════╝
BANNER
printf "${RESET}"
# Hardware
log_header "Hardware"
print_kv "CPU" "$cpu_model (${cpu_physical}C/${cpu_threads}T)"
print_kv "GPU" "$gpu_name"
print_kv "System RAM (visible)" "$(human_bytes "$ram_bytes")"
# Kernel & Firmware
log_header "Kernel & Firmware"
kernel_major=$(echo "$kernel" | cut -d. -f1)
kernel_minor=$(echo "$kernel" | cut -d. -f2)
kernel_ok=0
if (( kernel_major > 6 )) || (( kernel_major == 6 && kernel_minor >= 18 )); then
kernel_ok=1
fi
check "$kernel_ok" "Kernel version" "$kernel (need >= 6.18.4)"
firmware_ok=1
firmware_note="$firmware"
if detect_firmware_bad; then
firmware_ok=0
firmware_note="$firmware (KNOWN BAD — causes ROCm crashes!)"
fi
check "$firmware_ok" "Firmware" "$firmware_note"
# Memory allocation
log_header "Memory Allocation"
vram_gib=$(echo "scale=1; $vram_total / 1073741824" | bc)
gtt_gib=$(echo "scale=1; $gtt_total / 1073741824" | bc)
# VRAM: should be <= 1 GiB (ideally 0.5 GiB)
vram_ok=0
(( vram_total <= 1073741824 )) && vram_ok=1
check "$vram_ok" "VRAM (dedicated)" "${vram_gib} GiB$([ "$vram_ok" -eq 0 ] && echo " — should be 0.5 GiB in BIOS")"
# GTT: should be close to recommended (at least 75%)
gtt_rec_bytes=$(( rec_gttsize * 1048576 ))
gtt_ok=0
(( gtt_total >= gtt_rec_bytes * 3 / 4 )) && gtt_ok=1
check "$gtt_ok" "GTT (dynamic)" "${gtt_gib} GiB$([ "$gtt_ok" -eq 0 ] && echo " — should be ~$(human_mib "$rec_gttsize") with kernel params")"
print_kv "VRAM in use" "$(human_bytes "$vram_used")"
print_kv "GTT in use" "$(human_bytes "$gtt_used")"
# Kernel boot parameters
log_header "Kernel Boot Parameters"
iommu_ok=0
[[ "$param_iommu" == "pt" ]] && iommu_ok=1
check "$iommu_ok" "iommu=pt" "$([ -n "$param_iommu" ] && echo "current: $param_iommu" || echo "MISSING")"
gtt_param_ok=0
[[ -n "$param_gttsize" ]] && gtt_param_ok=1
check "$gtt_param_ok" "amdgpu.gttsize" "$([ -n "$param_gttsize" ] && echo "current: ${param_gttsize} MiB" || echo "MISSING — recommended: ${rec_gttsize}")"
pages_ok=0
[[ -n "$param_pages" ]] && pages_ok=1
check "$pages_ok" "ttm.pages_limit" "$([ -n "$param_pages" ] && echo "current: $param_pages" || echo "MISSING — recommended: ${rec_pages}")"
# Tuned profile
log_header "Performance Profile"
tuned_ok=0
[[ "$tuned" == "accelerator-performance" ]] && tuned_ok=1
check "$tuned_ok" "Tuned profile" "$tuned$([ "$tuned_ok" -eq 0 ] && echo " — recommended: accelerator-performance")"
# Software stack
log_header "Software Stack"
check_info "ROCm" "$rocm_ver"
check_info "Vulkan" "$vulkan_drv $vulkan_ver"
# Toolboxes
toolbox_count=0
if is_cmd toolbox; then
toolbox_count=$(detect_toolbox_names | wc -l)
fi
if (( toolbox_count > 0 )); then
check_info "Toolbox containers" "$toolbox_count available"
detect_toolbox_names | while read -r name; do
printf " ${DIM}%s${RESET}\n" "$name"
done
else
check_warn "Toolbox containers" "none — run 'make benchmark-setup'"
fi
# LLM stacks
log_header "LLM Stacks"
check_info "LM Studio" "$(detect_stack_lmstudio)"
check_info "opencode" "$(detect_stack_opencode)"
check_info "ollama" "$(detect_stack_ollama)"
check_info "llama.cpp (native)" "$(detect_stack_llamacpp)"
# Sensors
log_header "Current Sensors"
gpu_temp="$(detect_gpu_temp)"
gpu_power="$(detect_gpu_power)"
gpu_busy="$(detect_gpu_busy)"
print_kv "GPU Temperature" "$(echo "scale=1; $gpu_temp / 1000" | bc) C"
print_kv "GPU Power" "$(echo "scale=1; $gpu_power / 1000000" | bc) W"
print_kv "GPU Utilization" "${gpu_busy}%"
# Overall score
log_header "Optimization Score"
printf "\n ${BOLD}%d / %d${RESET} checks passing\n" "$score" "$total"
if (( score == total )); then
printf " ${GREEN}System is fully optimized!${RESET}\n"
elif (( score >= total / 2 )); then
printf " ${YELLOW}Partially optimized — run 'make optimize' for improvements${RESET}\n"
else
printf " ${RED}Significant optimizations available — run 'make optimize'${RESET}\n"
fi
echo ""