Files
strix-halo-optimizations/scripts/audit/system-report.sh
Felipe Cardoso c596e38e9e Initial commit
2026-03-25 20:13:15 +01:00

195 lines
7.0 KiB
Bash

#!/usr/bin/env bash
# Full system report — detailed audit with JSON + text output
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/../../lib/common.sh"
source "$SCRIPT_DIR/../../lib/detect.sh"
source "$SCRIPT_DIR/../../lib/format.sh"
JSON_ONLY=false
[[ "${1:-}" == "--json" ]] && JSON_ONLY=true
# ── Gather all data ──────────────────────────────────────
ts="$(timestamp)"
cpu_model="$(detect_cpu_model)"
cpu_threads="$(detect_cpu_cores)"
cpu_physical="$(detect_cpu_physical)"
gpu_name="$(detect_gpu_name)"
gpu_device_id="$(detect_gpu_device_id)"
kernel="$(detect_kernel_version)"
firmware="$(detect_firmware_version)"
vram_total="$(detect_vram_total)"
vram_used="$(detect_vram_used)"
gtt_total="$(detect_gtt_total)"
gtt_used="$(detect_gtt_used)"
ram_kb="$(detect_system_ram_kb)"
param_iommu="$(detect_kernel_param 'iommu')"
param_gttsize="$(detect_gttsize_param)"
param_pages="$(detect_pages_limit_param)"
cmdline="$(cat /proc/cmdline)"
tuned="$(detect_tuned_profile)"
rocm_ver="$(detect_rocm_version)"
vulkan_drv="$(detect_vulkan_driver)"
vulkan_ver="$(detect_vulkan_version)"
gpu_temp="$(detect_gpu_temp)"
gpu_power="$(detect_gpu_power)"
gpu_busy="$(detect_gpu_busy)"
rec_gttsize="$(recommended_gttsize_mib)"
rec_pages="$(recommended_pages_limit)"
# Toolbox list
toolboxes_json="[]"
if is_cmd toolbox; then
toolboxes_json="$(detect_toolbox_names | jq -R . | jq -s . 2>/dev/null || echo '[]')"
fi
# LLM stacks
stack_ollama="$(detect_stack_ollama)"
stack_lmstudio="$(detect_stack_lmstudio)"
stack_llamacpp="$(detect_stack_llamacpp)"
stack_opencode="$(detect_stack_opencode)"
# ROCm packages
rocm_pkgs="$(detect_rocm_packages | head -30)"
# ── Build JSON (all data via env vars — no shell interpolation into Python) ──
json_report="$(
SR_TS="$ts" \
SR_CPU_MODEL="$cpu_model" SR_CPU_CORES="$cpu_physical" SR_CPU_THREADS="$cpu_threads" \
SR_GPU_NAME="$gpu_name" SR_GPU_DEVICE_ID="$gpu_device_id" SR_RAM_KB="$ram_kb" \
SR_VRAM_TOTAL="$vram_total" SR_VRAM_USED="$vram_used" \
SR_GTT_TOTAL="$gtt_total" SR_GTT_USED="$gtt_used" \
SR_REC_GTTSIZE="$rec_gttsize" SR_REC_PAGES="$rec_pages" \
SR_KERNEL="$kernel" SR_CMDLINE="$cmdline" \
SR_PARAM_IOMMU="$param_iommu" SR_PARAM_GTTSIZE="$param_gttsize" SR_PARAM_PAGES="$param_pages" \
SR_FIRMWARE="$firmware" SR_TUNED="$tuned" SR_ROCM="$rocm_ver" \
SR_VULKAN_DRV="$vulkan_drv" SR_VULKAN_VER="${vulkan_ver:-}" \
SR_GPU_TEMP="$gpu_temp" SR_GPU_POWER="$gpu_power" SR_GPU_BUSY="$gpu_busy" \
SR_TOOLBOXES="$toolboxes_json" \
SR_STACK_OLLAMA="$stack_ollama" SR_STACK_LMSTUDIO="$stack_lmstudio" \
SR_STACK_LLAMACPP="$stack_llamacpp" SR_STACK_OPENCODE="$stack_opencode" \
python3 -c '
import json, os
e = os.environ
data = {
"timestamp": e["SR_TS"],
"hardware": {
"cpu_model": e["SR_CPU_MODEL"],
"cpu_cores": int(e["SR_CPU_CORES"]),
"cpu_threads": int(e["SR_CPU_THREADS"]),
"gpu_name": e["SR_GPU_NAME"],
"gpu_device_id": e["SR_GPU_DEVICE_ID"],
"system_ram_kb": int(e["SR_RAM_KB"]),
},
"memory": {
"vram_total_bytes": int(e["SR_VRAM_TOTAL"]),
"vram_used_bytes": int(e["SR_VRAM_USED"]),
"gtt_total_bytes": int(e["SR_GTT_TOTAL"]),
"gtt_used_bytes": int(e["SR_GTT_USED"]),
"recommended_gttsize_mib": int(e["SR_REC_GTTSIZE"]),
"recommended_pages_limit": int(e["SR_REC_PAGES"]),
},
"kernel": {
"version": e["SR_KERNEL"],
"cmdline": e["SR_CMDLINE"],
"param_iommu": e["SR_PARAM_IOMMU"],
"param_gttsize": e["SR_PARAM_GTTSIZE"],
"param_pages_limit": e["SR_PARAM_PAGES"],
},
"firmware": e["SR_FIRMWARE"],
"tuned_profile": e["SR_TUNED"],
"rocm_version": e["SR_ROCM"],
"vulkan": {
"driver": e["SR_VULKAN_DRV"],
"version": e["SR_VULKAN_VER"],
},
"sensors": {
"gpu_temp_mc": int(e["SR_GPU_TEMP"]),
"gpu_power_uw": int(e["SR_GPU_POWER"]),
"gpu_busy_pct": int(e["SR_GPU_BUSY"]),
},
"toolboxes": json.loads(e["SR_TOOLBOXES"]),
"stacks": {
"ollama": e["SR_STACK_OLLAMA"],
"lmstudio": e["SR_STACK_LMSTUDIO"],
"llamacpp": e["SR_STACK_LLAMACPP"],
"opencode": e["SR_STACK_OPENCODE"],
},
}
print(json.dumps(data, indent=2))
'
)"
if $JSON_ONLY; then
echo "$json_report" | python3 -m json.tool 2>/dev/null || echo "$json_report"
exit 0
fi
# ── Save report ──────────────────────────────────────────
audit_dir="$(data_dir audits)"
json_file="$audit_dir/report-${ts}.json"
text_file="$audit_dir/report-${ts}.txt"
echo "$json_report" | python3 -m json.tool > "$json_file" 2>/dev/null || echo "$json_report" > "$json_file"
# ── Text output (also saved) ────────────────────────────
{
printf "Strix Halo Full System Report — %s\n" "$ts"
printf "=%.0s" {1..60}; echo
printf "\nHardware:\n"
printf " CPU: %s (%sC/%sT)\n" "$cpu_model" "$cpu_physical" "$cpu_threads"
printf " GPU: %s (device: 0x%s)\n" "$gpu_name" "$gpu_device_id"
printf " RAM: %s KB\n" "$ram_kb"
printf "\nMemory Allocation:\n"
printf " VRAM total: %s (used: %s)\n" "$(human_bytes "$vram_total")" "$(human_bytes "$vram_used")"
printf " GTT total: %s (used: %s)\n" "$(human_bytes "$gtt_total")" "$(human_bytes "$gtt_used")"
printf " Recommended: gttsize=%s MiB, pages_limit=%s\n" "$rec_gttsize" "$rec_pages"
printf "\nKernel:\n"
printf " Version: %s\n" "$kernel"
printf " Firmware: %s\n" "$firmware"
printf " Cmdline: %s\n" "$cmdline"
printf " iommu: %s\n" "${param_iommu:-not set}"
printf " gttsize: %s\n" "${param_gttsize:-not set}"
printf " pages_limit:%s\n" "${param_pages:-not set}"
printf "\nPerformance:\n"
printf " Tuned: %s\n" "$tuned"
printf " GPU temp: %s C\n" "$(echo "scale=1; $gpu_temp / 1000" | bc)"
printf " GPU power: %s W\n" "$(echo "scale=1; $gpu_power / 1000000" | bc)"
printf " GPU busy: %s%%\n" "$gpu_busy"
printf "\nSoftware:\n"
printf " ROCm: %s\n" "$rocm_ver"
printf " Vulkan: %s %s\n" "$vulkan_drv" "$vulkan_ver"
printf "\nROCm Packages:\n"
echo "$rocm_pkgs" | sed 's/^/ /'
printf "\nToolboxes:\n"
if [[ "$toolboxes_json" == "[]" ]]; then
printf " none\n"
else
echo "$toolboxes_json" | python3 -c "import sys,json; [print(f' {x}') for x in json.load(sys.stdin)]" 2>/dev/null || printf " (parse error)\n"
fi
printf "\nLLM Stacks:\n"
printf " ollama: %s\n" "$stack_ollama"
printf " LM Studio: %s\n" "$stack_lmstudio"
printf " llama.cpp: %s\n" "$stack_llamacpp"
printf " opencode: %s\n" "$stack_opencode"
} | tee "$text_file"
echo ""
log_success "Report saved to:"
log_info " JSON: $json_file"
log_info " Text: $text_file"