Files
strix-halo-optimizations/scripts/benchmark/run-suite.sh
Felipe Cardoso 7c8be55bfe fix: resolve model paths for toolbox container access
Toolbox containers mount host / at /run/host/ but only /home is directly
accessible. Models on /data/models/ need the /run/host/ prefix when passed
to llama-bench inside the container.

Both run-baseline.sh and run-suite.sh now resolve model paths with realpath
and prepend /run/host/ for non-home paths. Paths under /home/ are passed
as-is (already mounted directly).

Verified with smoke test: Qwen3.5-0.8B-Q8_0 → 8900 t/s pp512, 177 t/s tg128.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 19:17:16 +01:00

252 lines
8.3 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# Full benchmark suite — run all backends × models with tagging
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/../../lib/common.sh"
source "$SCRIPT_DIR/../../lib/detect.sh"
source "$SCRIPT_DIR/../../lib/format.sh"
MODEL_DIR="$(data_dir models)"
TAG="run"
BACKENDS_FILTER=""
MODELS_FILTER=""
SKIP_LONGCTX=false
MAX_SIZE_GB=0
CATEGORY_FILTER=""
REPS_STANDARD=5
REPS_LONGCTX=3
while [[ $# -gt 0 ]]; do
case "$1" in
--tag|-t) TAG="$2"; shift 2 ;;
--backends|-b) BACKENDS_FILTER="$2"; shift 2 ;;
--models|-m) MODELS_FILTER="$2"; shift 2 ;;
--skip-longctx) SKIP_LONGCTX=true; shift ;;
--max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;;
--category|-c) CATEGORY_FILTER="$2"; shift 2 ;;
--reps|-r) REPS_STANDARD="$2"; shift 2 ;;
--help|-h)
echo "Usage: run-suite.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --tag NAME Tag this run (default: run)"
echo " --backends LIST Comma-separated backend filter"
echo " --models LIST Comma-separated model filename filter"
echo " --skip-longctx Skip long-context (32K) tests"
echo " --max-size GB Only bench models up to this file size in GB"
echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)"
echo " --reps N Standard test repetitions (default: 5)"
exit 0 ;;
*) log_warn "Unknown argument: $1"; shift ;;
esac
done
TS="$(timestamp)"
RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
mkdir -p "$RESULT_DIR"
log_header "Benchmark Suite: $TAG"
log_info "Results: $RESULT_DIR"
# Save system state
bash "$SCRIPT_DIR/../audit/system-report.sh" --json > "$RESULT_DIR/system-state.json" 2>/dev/null
# Discover backends
existing="$(detect_toolbox_names 2>/dev/null || true)"
declare -A BENCH_PATHS=(
[llama-vulkan-radv]="/usr/sbin/llama-bench"
[llama-vulkan-amdvlk]="/usr/sbin/llama-bench"
[llama-rocm-6.4.4]="/usr/local/bin/llama-bench"
[llama-rocm-7.2]="/usr/local/bin/llama-bench"
[llama-rocm7-nightlies]="/usr/local/bin/llama-bench"
)
available_backends=()
for tb in "${!BENCH_PATHS[@]}"; do
if echo "$existing" | grep -q "^${tb}$"; then
if [[ -z "$BACKENDS_FILTER" ]] || echo "$BACKENDS_FILTER" | tr ',' '\n' | grep -q "$tb"; then
available_backends+=("$tb")
fi
fi
done
if (( ${#available_backends[@]} == 0 )); then
log_error "No matching backends. Run: make benchmark-setup"
exit 1
fi
log_info "Backends: ${available_backends[*]}"
# Find and filter models
mapfile -t ALL_MODEL_PATHS < <(
find -L "$MODEL_DIR" -type f -name '*.gguf' -not -name 'mmproj-*' \
\( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \
| sort
)
MODEL_PATHS=()
for p in "${ALL_MODEL_PATHS[@]}"; do
local_name="$(basename "$p")"
# Name filter
if [[ -n "$MODELS_FILTER" ]]; then
if ! echo "$MODELS_FILTER" | tr ',' '\n' | grep -qi "$local_name"; then
continue
fi
fi
# Size filter
if (( MAX_SIZE_GB > 0 )); then
file_size_gb=$(( $(stat -Lc%s "$p" 2>/dev/null || echo 0) / 1073741824 ))
if (( file_size_gb >= MAX_SIZE_GB )); then
log_info "Skipping $local_name (${file_size_gb} GB > ${MAX_SIZE_GB} GB limit)"
continue
fi
fi
# Category filter
if [[ -n "$CATEGORY_FILTER" ]] && [[ -f "$PROJECT_ROOT/configs/models.conf" ]]; then
matched=false
found_in_catalog=false
while IFS='|' read -r name repo file size_gb category desc; do
[[ "$name" =~ ^#.*$ || -z "$name" ]] && continue
if [[ "$local_name" == "$file" ]]; then
found_in_catalog=true
echo "$CATEGORY_FILTER" | tr ',' '\n' | grep -qF "$category" && matched=true
break
fi
done < "$PROJECT_ROOT/configs/models.conf"
if $found_in_catalog && ! $matched; then
log_info "Skipping $local_name (category not in: $CATEGORY_FILTER)"
continue
fi
fi
MODEL_PATHS+=("$p")
done
if (( ${#MODEL_PATHS[@]} == 0 )); then
log_error "No models matched filters. Run: make benchmark-setup"
exit 1
fi
log_info "Models: ${#MODEL_PATHS[@]}"
# Start metric logging
METRICS_FILE="$RESULT_DIR/metrics.csv"
bash "$SCRIPT_DIR/../monitor/log-metrics.sh" --output "$METRICS_FILE" --interval 2 &
METRICS_PID=$!
trap 'kill "$METRICS_PID" 2>/dev/null; wait "$METRICS_PID" 2>/dev/null' EXIT
# Run benchmarks (same logic as run-baseline.sh)
for MODEL_PATH in "${MODEL_PATHS[@]}"; do
MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
for BACKEND in "${available_backends[@]}"; do
BENCH_BIN="${BENCH_PATHS[$BACKEND]}"
BACKEND_SAFE="${BACKEND//[.-]/_}"
ENV_ARGS=()
[[ "$BACKEND" == *rocm* ]] && ENV_ARGS=(env ROCBLAS_USE_HIPBLASLT=1)
# Resolve model path for toolbox
TOOLBOX_MODEL_PATH="$(realpath "$MODEL_PATH")"
if [[ "$TOOLBOX_MODEL_PATH" != /home/* ]]; then
TOOLBOX_MODEL_PATH="/run/host${TOOLBOX_MODEL_PATH}"
fi
# Standard test
OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log"
if [[ ! -s "$OUT" ]]; then
printf "\n${BOLD}>> [%s] %s — standard${RESET}\n" "$BACKEND" "$MODEL_NAME"
CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 -r "$REPS_STANDARD")
if "${CMD[@]}" > "$OUT" 2>&1; then
log_success "Done"; tail -3 "$OUT"
else
log_error "Failed"; echo "FAILED" >> "$OUT"
fi
fi
# Long-context test
if $SKIP_LONGCTX; then
continue
fi
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
if [[ ! -s "$OUT_LC" ]]; then
printf "\n${BOLD}>> [%s] %s — longctx${RESET}\n" "$BACKEND" "$MODEL_NAME"
UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
-p 2048 -n 32 -d 32768 -ub "$UB_SIZE" -r "$REPS_LONGCTX")
if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
log_success "Done"; tail -3 "$OUT_LC"
else
log_error "Failed"; echo "FAILED" >> "$OUT_LC"
fi
fi
done
done
# Parse results
SUMMARY="$RESULT_DIR/summary.json"
# Parse llama-bench log files into summary JSON
python3 - "$RESULT_DIR" > "$SUMMARY" << 'PYEOF'
import sys, os, re, json
from pathlib import Path
result_dir = Path(sys.argv[1])
results = []
for logfile in sorted(result_dir.glob("*.log")):
content = logfile.read_text()
if "FAILED" in content:
continue
for line in content.splitlines():
line = line.strip()
if not line.startswith("|") or ("model" in line.lower() and "size" in line.lower()):
continue
if "---" in line:
continue
parts = [p.strip() for p in line.split("|")]
if len(parts) < 10:
continue
try:
test_type = parts[8].strip()
ts_raw = parts[9].strip()
ts_match = re.match(r'([\d.]+)', ts_raw)
if not ts_match:
continue
results.append({
"file": logfile.name,
"model": parts[1].strip(),
"size": parts[2].strip(),
"backend": parts[4].strip(),
"test": test_type,
"tokens_per_sec": float(ts_match.group(1)),
"raw": ts_raw,
})
except (ValueError, IndexError):
continue
print(json.dumps({"results": results}, indent=2))
PYEOF
log_header "Results"
python3 - "$SUMMARY" << 'PYEOF'
import sys, json
with open(sys.argv[1]) as f:
data = json.load(f)
if not data["results"]:
print(" No results parsed.")
sys.exit(0)
fmt = " {:<20} {:<16} {:<8} {:>10}"
print(fmt.format("Model", "Backend", "Test", "t/s"))
print(" " + "-" * 58)
for r in data["results"]:
print(fmt.format(r["model"][:20], r["backend"][:16], r["test"], f"{r['tokens_per_sec']:.2f}"))
PYEOF
echo ""
log_success "Results saved to: $RESULT_DIR"