Initial commit
This commit is contained in:
180
scripts/audit/quick-glance.sh
Normal file
180
scripts/audit/quick-glance.sh
Normal file
@@ -0,0 +1,180 @@
|
||||
#!/usr/bin/env bash
|
||||
# Quick-glance system audit — single screen status overview
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/../../lib/common.sh"
|
||||
source "$SCRIPT_DIR/../../lib/detect.sh"
|
||||
source "$SCRIPT_DIR/../../lib/format.sh"
|
||||
|
||||
# ── Gather data ──────────────────────────────────────────
|
||||
cpu_model="$(detect_cpu_model)"
|
||||
cpu_threads="$(detect_cpu_cores)"
|
||||
cpu_physical="$(detect_cpu_physical)"
|
||||
gpu_name="$(detect_gpu_name)"
|
||||
kernel="$(detect_kernel_version)"
|
||||
firmware="$(detect_firmware_version)"
|
||||
|
||||
vram_total="$(detect_vram_total)"
|
||||
vram_used="$(detect_vram_used)"
|
||||
gtt_total="$(detect_gtt_total)"
|
||||
gtt_used="$(detect_gtt_used)"
|
||||
ram_kb="$(detect_system_ram_kb)"
|
||||
ram_bytes=$(( ram_kb * 1024 ))
|
||||
|
||||
param_iommu="$(detect_kernel_param 'iommu')"
|
||||
param_gttsize="$(detect_gttsize_param)"
|
||||
param_pages="$(detect_pages_limit_param)"
|
||||
|
||||
tuned="$(detect_tuned_profile)"
|
||||
|
||||
rocm_ver="$(detect_rocm_version)"
|
||||
vulkan_drv="$(detect_vulkan_driver)"
|
||||
vulkan_ver="$(detect_vulkan_version)"
|
||||
|
||||
rec_gttsize="$(recommended_gttsize_mib)"
|
||||
rec_pages="$(recommended_pages_limit)"
|
||||
|
||||
# ── Score tracking ───────────────────────────────────────
|
||||
score=0
|
||||
total=0
|
||||
|
||||
check() {
|
||||
local pass="$1" label="$2" detail="$3"
|
||||
total=$(( total + 1 ))
|
||||
if [[ "$pass" == "1" ]]; then
|
||||
score=$(( score + 1 ))
|
||||
print_status pass "$label" "$detail"
|
||||
else
|
||||
print_status fail "$label" "$detail"
|
||||
fi
|
||||
}
|
||||
|
||||
check_warn() {
|
||||
local label="$1" detail="$2"
|
||||
print_status warn "$label" "$detail"
|
||||
}
|
||||
|
||||
check_info() {
|
||||
local label="$1" detail="$2"
|
||||
print_status info "$label" "$detail"
|
||||
}
|
||||
|
||||
# ── Output ───────────────────────────────────────────────
|
||||
printf "\n${BOLD}${CYAN}"
|
||||
cat << 'BANNER'
|
||||
╔═══════════════════════════════════════════╗
|
||||
║ AMD Strix Halo — System Status ║
|
||||
╚═══════════════════════════════════════════╝
|
||||
BANNER
|
||||
printf "${RESET}"
|
||||
|
||||
# Hardware
|
||||
log_header "Hardware"
|
||||
print_kv "CPU" "$cpu_model (${cpu_physical}C/${cpu_threads}T)"
|
||||
print_kv "GPU" "$gpu_name"
|
||||
print_kv "System RAM (visible)" "$(human_bytes "$ram_bytes")"
|
||||
|
||||
# Kernel & Firmware
|
||||
log_header "Kernel & Firmware"
|
||||
kernel_major=$(echo "$kernel" | cut -d. -f1)
|
||||
kernel_minor=$(echo "$kernel" | cut -d. -f2)
|
||||
kernel_ok=0
|
||||
if (( kernel_major > 6 )) || (( kernel_major == 6 && kernel_minor >= 18 )); then
|
||||
kernel_ok=1
|
||||
fi
|
||||
check "$kernel_ok" "Kernel version" "$kernel (need >= 6.18.4)"
|
||||
|
||||
firmware_ok=1
|
||||
firmware_note="$firmware"
|
||||
if detect_firmware_bad; then
|
||||
firmware_ok=0
|
||||
firmware_note="$firmware (KNOWN BAD — causes ROCm crashes!)"
|
||||
fi
|
||||
check "$firmware_ok" "Firmware" "$firmware_note"
|
||||
|
||||
# Memory allocation
|
||||
log_header "Memory Allocation"
|
||||
vram_gib=$(echo "scale=1; $vram_total / 1073741824" | bc)
|
||||
gtt_gib=$(echo "scale=1; $gtt_total / 1073741824" | bc)
|
||||
|
||||
# VRAM: should be <= 1 GiB (ideally 0.5 GiB)
|
||||
vram_ok=0
|
||||
(( vram_total <= 1073741824 )) && vram_ok=1
|
||||
check "$vram_ok" "VRAM (dedicated)" "${vram_gib} GiB$([ "$vram_ok" -eq 0 ] && echo " — should be 0.5 GiB in BIOS")"
|
||||
|
||||
# GTT: should be close to recommended (at least 75%)
|
||||
gtt_rec_bytes=$(( rec_gttsize * 1048576 ))
|
||||
gtt_ok=0
|
||||
(( gtt_total >= gtt_rec_bytes * 3 / 4 )) && gtt_ok=1
|
||||
check "$gtt_ok" "GTT (dynamic)" "${gtt_gib} GiB$([ "$gtt_ok" -eq 0 ] && echo " — should be ~$(human_mib "$rec_gttsize") with kernel params")"
|
||||
|
||||
print_kv "VRAM in use" "$(human_bytes "$vram_used")"
|
||||
print_kv "GTT in use" "$(human_bytes "$gtt_used")"
|
||||
|
||||
# Kernel boot parameters
|
||||
log_header "Kernel Boot Parameters"
|
||||
iommu_ok=0
|
||||
[[ "$param_iommu" == "pt" ]] && iommu_ok=1
|
||||
check "$iommu_ok" "iommu=pt" "$([ -n "$param_iommu" ] && echo "current: $param_iommu" || echo "MISSING")"
|
||||
|
||||
gtt_param_ok=0
|
||||
[[ -n "$param_gttsize" ]] && gtt_param_ok=1
|
||||
check "$gtt_param_ok" "amdgpu.gttsize" "$([ -n "$param_gttsize" ] && echo "current: ${param_gttsize} MiB" || echo "MISSING — recommended: ${rec_gttsize}")"
|
||||
|
||||
pages_ok=0
|
||||
[[ -n "$param_pages" ]] && pages_ok=1
|
||||
check "$pages_ok" "ttm.pages_limit" "$([ -n "$param_pages" ] && echo "current: $param_pages" || echo "MISSING — recommended: ${rec_pages}")"
|
||||
|
||||
# Tuned profile
|
||||
log_header "Performance Profile"
|
||||
tuned_ok=0
|
||||
[[ "$tuned" == "accelerator-performance" ]] && tuned_ok=1
|
||||
check "$tuned_ok" "Tuned profile" "$tuned$([ "$tuned_ok" -eq 0 ] && echo " — recommended: accelerator-performance")"
|
||||
|
||||
# Software stack
|
||||
log_header "Software Stack"
|
||||
check_info "ROCm" "$rocm_ver"
|
||||
check_info "Vulkan" "$vulkan_drv $vulkan_ver"
|
||||
|
||||
# Toolboxes
|
||||
toolbox_count=0
|
||||
if is_cmd toolbox; then
|
||||
toolbox_count=$(detect_toolbox_names | wc -l)
|
||||
fi
|
||||
if (( toolbox_count > 0 )); then
|
||||
check_info "Toolbox containers" "$toolbox_count available"
|
||||
detect_toolbox_names | while read -r name; do
|
||||
printf " ${DIM}%s${RESET}\n" "$name"
|
||||
done
|
||||
else
|
||||
check_warn "Toolbox containers" "none — run 'make benchmark-setup'"
|
||||
fi
|
||||
|
||||
# LLM stacks
|
||||
log_header "LLM Stacks"
|
||||
check_info "LM Studio" "$(detect_stack_lmstudio)"
|
||||
check_info "opencode" "$(detect_stack_opencode)"
|
||||
check_info "ollama" "$(detect_stack_ollama)"
|
||||
check_info "llama.cpp (native)" "$(detect_stack_llamacpp)"
|
||||
|
||||
# Sensors
|
||||
log_header "Current Sensors"
|
||||
gpu_temp="$(detect_gpu_temp)"
|
||||
gpu_power="$(detect_gpu_power)"
|
||||
gpu_busy="$(detect_gpu_busy)"
|
||||
print_kv "GPU Temperature" "$(echo "scale=1; $gpu_temp / 1000" | bc) C"
|
||||
print_kv "GPU Power" "$(echo "scale=1; $gpu_power / 1000000" | bc) W"
|
||||
print_kv "GPU Utilization" "${gpu_busy}%"
|
||||
|
||||
# Overall score
|
||||
log_header "Optimization Score"
|
||||
printf "\n ${BOLD}%d / %d${RESET} checks passing\n" "$score" "$total"
|
||||
if (( score == total )); then
|
||||
printf " ${GREEN}System is fully optimized!${RESET}\n"
|
||||
elif (( score >= total / 2 )); then
|
||||
printf " ${YELLOW}Partially optimized — run 'make optimize' for improvements${RESET}\n"
|
||||
else
|
||||
printf " ${RED}Significant optimizations available — run 'make optimize'${RESET}\n"
|
||||
fi
|
||||
echo ""
|
||||
194
scripts/audit/system-report.sh
Normal file
194
scripts/audit/system-report.sh
Normal file
@@ -0,0 +1,194 @@
|
||||
#!/usr/bin/env bash
|
||||
# Full system report — detailed audit with JSON + text output
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/../../lib/common.sh"
|
||||
source "$SCRIPT_DIR/../../lib/detect.sh"
|
||||
source "$SCRIPT_DIR/../../lib/format.sh"
|
||||
|
||||
JSON_ONLY=false
|
||||
[[ "${1:-}" == "--json" ]] && JSON_ONLY=true
|
||||
|
||||
# ── Gather all data ──────────────────────────────────────
|
||||
ts="$(timestamp)"
|
||||
cpu_model="$(detect_cpu_model)"
|
||||
cpu_threads="$(detect_cpu_cores)"
|
||||
cpu_physical="$(detect_cpu_physical)"
|
||||
gpu_name="$(detect_gpu_name)"
|
||||
gpu_device_id="$(detect_gpu_device_id)"
|
||||
kernel="$(detect_kernel_version)"
|
||||
firmware="$(detect_firmware_version)"
|
||||
|
||||
vram_total="$(detect_vram_total)"
|
||||
vram_used="$(detect_vram_used)"
|
||||
gtt_total="$(detect_gtt_total)"
|
||||
gtt_used="$(detect_gtt_used)"
|
||||
ram_kb="$(detect_system_ram_kb)"
|
||||
|
||||
param_iommu="$(detect_kernel_param 'iommu')"
|
||||
param_gttsize="$(detect_gttsize_param)"
|
||||
param_pages="$(detect_pages_limit_param)"
|
||||
cmdline="$(cat /proc/cmdline)"
|
||||
|
||||
tuned="$(detect_tuned_profile)"
|
||||
rocm_ver="$(detect_rocm_version)"
|
||||
vulkan_drv="$(detect_vulkan_driver)"
|
||||
vulkan_ver="$(detect_vulkan_version)"
|
||||
|
||||
gpu_temp="$(detect_gpu_temp)"
|
||||
gpu_power="$(detect_gpu_power)"
|
||||
gpu_busy="$(detect_gpu_busy)"
|
||||
|
||||
rec_gttsize="$(recommended_gttsize_mib)"
|
||||
rec_pages="$(recommended_pages_limit)"
|
||||
|
||||
# Toolbox list
|
||||
toolboxes_json="[]"
|
||||
if is_cmd toolbox; then
|
||||
toolboxes_json="$(detect_toolbox_names | jq -R . | jq -s . 2>/dev/null || echo '[]')"
|
||||
fi
|
||||
|
||||
# LLM stacks
|
||||
stack_ollama="$(detect_stack_ollama)"
|
||||
stack_lmstudio="$(detect_stack_lmstudio)"
|
||||
stack_llamacpp="$(detect_stack_llamacpp)"
|
||||
stack_opencode="$(detect_stack_opencode)"
|
||||
|
||||
# ROCm packages
|
||||
rocm_pkgs="$(detect_rocm_packages | head -30)"
|
||||
|
||||
# ── Build JSON (all data via env vars — no shell interpolation into Python) ──
|
||||
json_report="$(
|
||||
SR_TS="$ts" \
|
||||
SR_CPU_MODEL="$cpu_model" SR_CPU_CORES="$cpu_physical" SR_CPU_THREADS="$cpu_threads" \
|
||||
SR_GPU_NAME="$gpu_name" SR_GPU_DEVICE_ID="$gpu_device_id" SR_RAM_KB="$ram_kb" \
|
||||
SR_VRAM_TOTAL="$vram_total" SR_VRAM_USED="$vram_used" \
|
||||
SR_GTT_TOTAL="$gtt_total" SR_GTT_USED="$gtt_used" \
|
||||
SR_REC_GTTSIZE="$rec_gttsize" SR_REC_PAGES="$rec_pages" \
|
||||
SR_KERNEL="$kernel" SR_CMDLINE="$cmdline" \
|
||||
SR_PARAM_IOMMU="$param_iommu" SR_PARAM_GTTSIZE="$param_gttsize" SR_PARAM_PAGES="$param_pages" \
|
||||
SR_FIRMWARE="$firmware" SR_TUNED="$tuned" SR_ROCM="$rocm_ver" \
|
||||
SR_VULKAN_DRV="$vulkan_drv" SR_VULKAN_VER="${vulkan_ver:-}" \
|
||||
SR_GPU_TEMP="$gpu_temp" SR_GPU_POWER="$gpu_power" SR_GPU_BUSY="$gpu_busy" \
|
||||
SR_TOOLBOXES="$toolboxes_json" \
|
||||
SR_STACK_OLLAMA="$stack_ollama" SR_STACK_LMSTUDIO="$stack_lmstudio" \
|
||||
SR_STACK_LLAMACPP="$stack_llamacpp" SR_STACK_OPENCODE="$stack_opencode" \
|
||||
python3 -c '
|
||||
import json, os
|
||||
e = os.environ
|
||||
data = {
|
||||
"timestamp": e["SR_TS"],
|
||||
"hardware": {
|
||||
"cpu_model": e["SR_CPU_MODEL"],
|
||||
"cpu_cores": int(e["SR_CPU_CORES"]),
|
||||
"cpu_threads": int(e["SR_CPU_THREADS"]),
|
||||
"gpu_name": e["SR_GPU_NAME"],
|
||||
"gpu_device_id": e["SR_GPU_DEVICE_ID"],
|
||||
"system_ram_kb": int(e["SR_RAM_KB"]),
|
||||
},
|
||||
"memory": {
|
||||
"vram_total_bytes": int(e["SR_VRAM_TOTAL"]),
|
||||
"vram_used_bytes": int(e["SR_VRAM_USED"]),
|
||||
"gtt_total_bytes": int(e["SR_GTT_TOTAL"]),
|
||||
"gtt_used_bytes": int(e["SR_GTT_USED"]),
|
||||
"recommended_gttsize_mib": int(e["SR_REC_GTTSIZE"]),
|
||||
"recommended_pages_limit": int(e["SR_REC_PAGES"]),
|
||||
},
|
||||
"kernel": {
|
||||
"version": e["SR_KERNEL"],
|
||||
"cmdline": e["SR_CMDLINE"],
|
||||
"param_iommu": e["SR_PARAM_IOMMU"],
|
||||
"param_gttsize": e["SR_PARAM_GTTSIZE"],
|
||||
"param_pages_limit": e["SR_PARAM_PAGES"],
|
||||
},
|
||||
"firmware": e["SR_FIRMWARE"],
|
||||
"tuned_profile": e["SR_TUNED"],
|
||||
"rocm_version": e["SR_ROCM"],
|
||||
"vulkan": {
|
||||
"driver": e["SR_VULKAN_DRV"],
|
||||
"version": e["SR_VULKAN_VER"],
|
||||
},
|
||||
"sensors": {
|
||||
"gpu_temp_mc": int(e["SR_GPU_TEMP"]),
|
||||
"gpu_power_uw": int(e["SR_GPU_POWER"]),
|
||||
"gpu_busy_pct": int(e["SR_GPU_BUSY"]),
|
||||
},
|
||||
"toolboxes": json.loads(e["SR_TOOLBOXES"]),
|
||||
"stacks": {
|
||||
"ollama": e["SR_STACK_OLLAMA"],
|
||||
"lmstudio": e["SR_STACK_LMSTUDIO"],
|
||||
"llamacpp": e["SR_STACK_LLAMACPP"],
|
||||
"opencode": e["SR_STACK_OPENCODE"],
|
||||
},
|
||||
}
|
||||
print(json.dumps(data, indent=2))
|
||||
'
|
||||
)"
|
||||
|
||||
if $JSON_ONLY; then
|
||||
echo "$json_report" | python3 -m json.tool 2>/dev/null || echo "$json_report"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Save report ──────────────────────────────────────────
|
||||
audit_dir="$(data_dir audits)"
|
||||
json_file="$audit_dir/report-${ts}.json"
|
||||
text_file="$audit_dir/report-${ts}.txt"
|
||||
|
||||
echo "$json_report" | python3 -m json.tool > "$json_file" 2>/dev/null || echo "$json_report" > "$json_file"
|
||||
|
||||
# ── Text output (also saved) ────────────────────────────
|
||||
{
|
||||
printf "Strix Halo Full System Report — %s\n" "$ts"
|
||||
printf "=%.0s" {1..60}; echo
|
||||
|
||||
printf "\nHardware:\n"
|
||||
printf " CPU: %s (%sC/%sT)\n" "$cpu_model" "$cpu_physical" "$cpu_threads"
|
||||
printf " GPU: %s (device: 0x%s)\n" "$gpu_name" "$gpu_device_id"
|
||||
printf " RAM: %s KB\n" "$ram_kb"
|
||||
|
||||
printf "\nMemory Allocation:\n"
|
||||
printf " VRAM total: %s (used: %s)\n" "$(human_bytes "$vram_total")" "$(human_bytes "$vram_used")"
|
||||
printf " GTT total: %s (used: %s)\n" "$(human_bytes "$gtt_total")" "$(human_bytes "$gtt_used")"
|
||||
printf " Recommended: gttsize=%s MiB, pages_limit=%s\n" "$rec_gttsize" "$rec_pages"
|
||||
|
||||
printf "\nKernel:\n"
|
||||
printf " Version: %s\n" "$kernel"
|
||||
printf " Firmware: %s\n" "$firmware"
|
||||
printf " Cmdline: %s\n" "$cmdline"
|
||||
printf " iommu: %s\n" "${param_iommu:-not set}"
|
||||
printf " gttsize: %s\n" "${param_gttsize:-not set}"
|
||||
printf " pages_limit:%s\n" "${param_pages:-not set}"
|
||||
|
||||
printf "\nPerformance:\n"
|
||||
printf " Tuned: %s\n" "$tuned"
|
||||
printf " GPU temp: %s C\n" "$(echo "scale=1; $gpu_temp / 1000" | bc)"
|
||||
printf " GPU power: %s W\n" "$(echo "scale=1; $gpu_power / 1000000" | bc)"
|
||||
printf " GPU busy: %s%%\n" "$gpu_busy"
|
||||
|
||||
printf "\nSoftware:\n"
|
||||
printf " ROCm: %s\n" "$rocm_ver"
|
||||
printf " Vulkan: %s %s\n" "$vulkan_drv" "$vulkan_ver"
|
||||
|
||||
printf "\nROCm Packages:\n"
|
||||
echo "$rocm_pkgs" | sed 's/^/ /'
|
||||
|
||||
printf "\nToolboxes:\n"
|
||||
if [[ "$toolboxes_json" == "[]" ]]; then
|
||||
printf " none\n"
|
||||
else
|
||||
echo "$toolboxes_json" | python3 -c "import sys,json; [print(f' {x}') for x in json.load(sys.stdin)]" 2>/dev/null || printf " (parse error)\n"
|
||||
fi
|
||||
|
||||
printf "\nLLM Stacks:\n"
|
||||
printf " ollama: %s\n" "$stack_ollama"
|
||||
printf " LM Studio: %s\n" "$stack_lmstudio"
|
||||
printf " llama.cpp: %s\n" "$stack_llamacpp"
|
||||
printf " opencode: %s\n" "$stack_opencode"
|
||||
} | tee "$text_file"
|
||||
|
||||
echo ""
|
||||
log_success "Report saved to:"
|
||||
log_info " JSON: $json_file"
|
||||
log_info " Text: $text_file"
|
||||
Reference in New Issue
Block a user