#!/usr/bin/env bash # Quick-glance system audit — single screen status overview set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../../lib/common.sh" source "$SCRIPT_DIR/../../lib/detect.sh" source "$SCRIPT_DIR/../../lib/format.sh" # ── Gather data ────────────────────────────────────────── cpu_model="$(detect_cpu_model)" cpu_threads="$(detect_cpu_cores)" cpu_physical="$(detect_cpu_physical)" gpu_name="$(detect_gpu_name)" kernel="$(detect_kernel_version)" firmware="$(detect_firmware_version)" vram_total="$(detect_vram_total)" vram_used="$(detect_vram_used)" gtt_total="$(detect_gtt_total)" gtt_used="$(detect_gtt_used)" ram_kb="$(detect_system_ram_kb)" ram_bytes=$(( ram_kb * 1024 )) param_iommu="$(detect_kernel_param 'iommu')" param_gttsize="$(detect_gttsize_param)" param_pages="$(detect_pages_limit_param)" tuned="$(detect_tuned_profile)" rocm_ver="$(detect_rocm_version)" vulkan_drv="$(detect_vulkan_driver)" vulkan_ver="$(detect_vulkan_version)" rec_gttsize="$(recommended_gttsize_mib)" rec_pages="$(recommended_pages_limit)" # ── Score tracking ─────────────────────────────────────── score=0 total=0 check() { local pass="$1" label="$2" detail="$3" total=$(( total + 1 )) if [[ "$pass" == "1" ]]; then score=$(( score + 1 )) print_status pass "$label" "$detail" else print_status fail "$label" "$detail" fi } check_warn() { local label="$1" detail="$2" print_status warn "$label" "$detail" } check_info() { local label="$1" detail="$2" print_status info "$label" "$detail" } # ── Output ─────────────────────────────────────────────── printf "\n${BOLD}${CYAN}" cat << 'BANNER' ╔═══════════════════════════════════════════╗ ║ AMD Strix Halo — System Status ║ ╚═══════════════════════════════════════════╝ BANNER printf "${RESET}" # Hardware log_header "Hardware" print_kv "CPU" "$cpu_model (${cpu_physical}C/${cpu_threads}T)" print_kv "GPU" "$gpu_name" print_kv "System RAM (visible)" "$(human_bytes "$ram_bytes")" # Kernel & Firmware log_header "Kernel & Firmware" kernel_major=$(echo "$kernel" | cut -d. -f1) kernel_minor=$(echo "$kernel" | cut -d. -f2) kernel_ok=0 if (( kernel_major > 6 )) || (( kernel_major == 6 && kernel_minor >= 18 )); then kernel_ok=1 fi check "$kernel_ok" "Kernel version" "$kernel (need >= 6.18.4)" firmware_ok=1 firmware_note="$firmware" if detect_firmware_bad; then firmware_ok=0 firmware_note="$firmware (KNOWN BAD — causes ROCm crashes!)" fi check "$firmware_ok" "Firmware" "$firmware_note" # Memory allocation log_header "Memory Allocation" vram_gib=$(echo "scale=1; $vram_total / 1073741824" | bc) gtt_gib=$(echo "scale=1; $gtt_total / 1073741824" | bc) # VRAM: should be <= 1 GiB (ideally 0.5 GiB) vram_ok=0 (( vram_total <= 1073741824 )) && vram_ok=1 check "$vram_ok" "VRAM (dedicated)" "${vram_gib} GiB$([ "$vram_ok" -eq 0 ] && echo " — should be 0.5 GiB in BIOS")" # GTT: should be close to recommended (at least 75%) gtt_rec_bytes=$(( rec_gttsize * 1048576 )) gtt_ok=0 (( gtt_total >= gtt_rec_bytes * 3 / 4 )) && gtt_ok=1 check "$gtt_ok" "GTT (dynamic)" "${gtt_gib} GiB$([ "$gtt_ok" -eq 0 ] && echo " — should be ~$(human_mib "$rec_gttsize") with kernel params")" print_kv "VRAM in use" "$(human_bytes "$vram_used")" print_kv "GTT in use" "$(human_bytes "$gtt_used")" # Kernel boot parameters log_header "Kernel Boot Parameters" iommu_ok=0 [[ "$param_iommu" == "pt" ]] && iommu_ok=1 check "$iommu_ok" "iommu=pt" "$([ -n "$param_iommu" ] && echo "current: $param_iommu" || echo "MISSING")" gtt_param_ok=0 [[ -n "$param_gttsize" ]] && gtt_param_ok=1 check "$gtt_param_ok" "amdgpu.gttsize" "$([ -n "$param_gttsize" ] && echo "current: ${param_gttsize} MiB" || echo "MISSING — recommended: ${rec_gttsize}")" pages_ok=0 [[ -n "$param_pages" ]] && pages_ok=1 check "$pages_ok" "ttm.pages_limit" "$([ -n "$param_pages" ] && echo "current: $param_pages" || echo "MISSING — recommended: ${rec_pages}")" # Tuned profile log_header "Performance Profile" tuned_ok=0 [[ "$tuned" == "accelerator-performance" ]] && tuned_ok=1 check "$tuned_ok" "Tuned profile" "$tuned$([ "$tuned_ok" -eq 0 ] && echo " — recommended: accelerator-performance")" # Software stack log_header "Software Stack" check_info "ROCm" "$rocm_ver" check_info "Vulkan" "$vulkan_drv $vulkan_ver" # Toolboxes toolbox_count=0 if is_cmd toolbox; then toolbox_count=$(detect_toolbox_names | wc -l) fi if (( toolbox_count > 0 )); then check_info "Toolbox containers" "$toolbox_count available" detect_toolbox_names | while read -r name; do printf " ${DIM}%s${RESET}\n" "$name" done else check_warn "Toolbox containers" "none — run 'make benchmark-setup'" fi # LLM stacks log_header "LLM Stacks" check_info "LM Studio" "$(detect_stack_lmstudio)" check_info "opencode" "$(detect_stack_opencode)" check_info "ollama" "$(detect_stack_ollama)" check_info "llama.cpp (native)" "$(detect_stack_llamacpp)" # Sensors log_header "Current Sensors" gpu_temp="$(detect_gpu_temp)" gpu_power="$(detect_gpu_power)" gpu_busy="$(detect_gpu_busy)" print_kv "GPU Temperature" "$(echo "scale=1; $gpu_temp / 1000" | bc) C" print_kv "GPU Power" "$(echo "scale=1; $gpu_power / 1000000" | bc) W" print_kv "GPU Utilization" "${gpu_busy}%" # Overall score log_header "Optimization Score" printf "\n ${BOLD}%d / %d${RESET} checks passing\n" "$score" "$total" if (( score == total )); then printf " ${GREEN}System is fully optimized!${RESET}\n" elif (( score >= total / 2 )); then printf " ${YELLOW}Partially optimized — run 'make optimize' for improvements${RESET}\n" else printf " ${RED}Significant optimizations available — run 'make optimize'${RESET}\n" fi echo ""