Initial commit

This commit is contained in:
Felipe Cardoso
2026-03-25 20:13:15 +01:00
commit c596e38e9e
26 changed files with 2345 additions and 0 deletions

View File

@@ -0,0 +1,194 @@
#!/usr/bin/env bash
# Full benchmark suite — run all backends × models with tagging
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/../../lib/common.sh"
source "$SCRIPT_DIR/../../lib/detect.sh"
source "$SCRIPT_DIR/../../lib/format.sh"
MODEL_DIR="$(data_dir models)"
TAG="run"
BACKENDS_FILTER=""
MODELS_FILTER=""
while [[ $# -gt 0 ]]; do
case "$1" in
--tag|-t) TAG="$2"; shift 2 ;;
--backends|-b) BACKENDS_FILTER="$2"; shift 2 ;;
--models|-m) MODELS_FILTER="$2"; shift 2 ;;
*) shift ;;
esac
done
TS="$(timestamp)"
RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
mkdir -p "$RESULT_DIR"
REPS_STANDARD=5
REPS_LONGCTX=3
log_header "Benchmark Suite: $TAG"
log_info "Results: $RESULT_DIR"
# Save system state
bash "$SCRIPT_DIR/../audit/system-report.sh" --json > "$RESULT_DIR/system-state.json" 2>/dev/null
# Discover backends
existing="$(detect_toolbox_names 2>/dev/null || true)"
declare -A BENCH_PATHS=(
[llama-vulkan-radv]="/usr/sbin/llama-bench"
[llama-vulkan-amdvlk]="/usr/sbin/llama-bench"
[llama-rocm-6.4.4]="/usr/local/bin/llama-bench"
[llama-rocm-7.2]="/usr/local/bin/llama-bench"
[llama-rocm7-nightlies]="/usr/local/bin/llama-bench"
)
available_backends=()
for tb in "${!BENCH_PATHS[@]}"; do
if echo "$existing" | grep -q "^${tb}$"; then
if [[ -z "$BACKENDS_FILTER" ]] || echo "$BACKENDS_FILTER" | tr ',' '\n' | grep -q "$tb"; then
available_backends+=("$tb")
fi
fi
done
if (( ${#available_backends[@]} == 0 )); then
log_error "No matching backends. Run: make benchmark-setup"
exit 1
fi
log_info "Backends: ${available_backends[*]}"
# Find models
mapfile -t MODEL_PATHS < <(
find "$MODEL_DIR" -type f -name '*.gguf' \
\( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \
| sort
)
if [[ -n "$MODELS_FILTER" ]]; then
filtered=()
for p in "${MODEL_PATHS[@]}"; do
name="$(basename "$p")"
if echo "$MODELS_FILTER" | tr ',' '\n' | grep -qi "$name"; then
filtered+=("$p")
fi
done
MODEL_PATHS=("${filtered[@]}")
fi
if (( ${#MODEL_PATHS[@]} == 0 )); then
log_error "No models found. Run: make benchmark-setup"
exit 1
fi
log_info "Models: ${#MODEL_PATHS[@]}"
# Start metric logging
METRICS_FILE="$RESULT_DIR/metrics.csv"
bash "$SCRIPT_DIR/../monitor/log-metrics.sh" --output "$METRICS_FILE" --interval 2 &
METRICS_PID=$!
trap 'kill "$METRICS_PID" 2>/dev/null; wait "$METRICS_PID" 2>/dev/null' EXIT
# Run benchmarks (same logic as run-baseline.sh)
for MODEL_PATH in "${MODEL_PATHS[@]}"; do
MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
for BACKEND in "${available_backends[@]}"; do
BENCH_BIN="${BENCH_PATHS[$BACKEND]}"
BACKEND_SAFE="${BACKEND//[.-]/_}"
ENV_ARGS=()
[[ "$BACKEND" == *rocm* ]] && ENV_ARGS=(env ROCBLAS_USE_HIPBLASLT=1)
# Standard test
OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log"
if [[ ! -s "$OUT" ]]; then
printf "\n${BOLD}>> [%s] %s — standard${RESET}\n" "$BACKEND" "$MODEL_NAME"
CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1 -r "$REPS_STANDARD")
if "${CMD[@]}" > "$OUT" 2>&1; then
log_success "Done"; tail -3 "$OUT"
else
log_error "Failed"; echo "FAILED" >> "$OUT"
fi
fi
# Long-context test
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
if [[ ! -s "$OUT_LC" ]]; then
printf "\n${BOLD}>> [%s] %s — longctx${RESET}\n" "$BACKEND" "$MODEL_NAME"
UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1
-p 2048 -n 32 -d 32768 -ub "$UB_SIZE" -r "$REPS_LONGCTX")
if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
log_success "Done"; tail -3 "$OUT_LC"
else
log_error "Failed"; echo "FAILED" >> "$OUT_LC"
fi
fi
done
done
# Parse results
SUMMARY="$RESULT_DIR/summary.json"
# Parse llama-bench log files into summary JSON
python3 - "$RESULT_DIR" > "$SUMMARY" << 'PYEOF'
import sys, os, re, json
from pathlib import Path
result_dir = Path(sys.argv[1])
results = []
for logfile in sorted(result_dir.glob("*.log")):
content = logfile.read_text()
if "FAILED" in content:
continue
for line in content.splitlines():
line = line.strip()
if not line.startswith("|") or "model" in line.lower() and "size" in line.lower():
continue
if "---" in line:
continue
parts = [p.strip() for p in line.split("|")]
if len(parts) < 10:
continue
try:
test_type = parts[8].strip()
ts_raw = parts[9].strip()
ts_match = re.match(r'([\d.]+)', ts_raw)
if not ts_match:
continue
results.append({
"file": logfile.name,
"model": parts[1].strip(),
"size": parts[2].strip(),
"backend": parts[4].strip(),
"test": test_type,
"tokens_per_sec": float(ts_match.group(1)),
"raw": ts_raw,
})
except (ValueError, IndexError):
continue
print(json.dumps({"results": results}, indent=2))
PYEOF
log_header "Results"
python3 - "$SUMMARY" << 'PYEOF'
import sys, json
with open(sys.argv[1]) as f:
data = json.load(f)
if not data["results"]:
print(" No results parsed.")
sys.exit(0)
fmt = " {:<20} {:<16} {:<8} {:>10}"
print(fmt.format("Model", "Backend", "Test", "t/s"))
print(" " + "-" * 58)
for r in data["results"]:
print(fmt.format(r["model"][:20], r["backend"][:16], r["test"], f"{r['tokens_per_sec']:.2f}"))
PYEOF
echo ""
log_success "Results saved to: $RESULT_DIR"