Initial commit
This commit is contained in:
140
scripts/benchmark/compare.sh
Normal file
140
scripts/benchmark/compare.sh
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env bash
|
||||
# Compare two benchmark runs side-by-side
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/../../lib/common.sh"
|
||||
source "$SCRIPT_DIR/../../lib/format.sh"
|
||||
|
||||
if [[ $# -lt 2 ]]; then
|
||||
echo "Usage: benchmark compare <before-dir> <after-dir>"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " bin/benchmark compare data/baselines/20260325-120000 data/benchmarks/post-opt-20260326-100000"
|
||||
echo ""
|
||||
echo "Available baselines:"
|
||||
ls -d "$(data_dir baselines)"/*/ 2>/dev/null | sed 's|/$||' || echo " (none)"
|
||||
echo ""
|
||||
echo "Available benchmark runs:"
|
||||
ls -d "$(data_dir benchmarks)"/*/ 2>/dev/null | sed 's|/$||' || echo " (none)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BEFORE_DIR="$1"
|
||||
AFTER_DIR="$2"
|
||||
|
||||
for d in "$BEFORE_DIR" "$AFTER_DIR"; do
|
||||
if [[ ! -f "$d/summary.json" ]]; then
|
||||
log_error "No summary.json in $d"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
log_header "Benchmark Comparison"
|
||||
|
||||
# Extract timestamps from directory names
|
||||
before_name="$(basename "$BEFORE_DIR")"
|
||||
after_name="$(basename "$AFTER_DIR")"
|
||||
log_info "Before: $before_name"
|
||||
log_info "After: $after_name"
|
||||
|
||||
# Show system state diff if available
|
||||
if [[ -f "$BEFORE_DIR/system-state.json" ]] && [[ -f "$AFTER_DIR/system-state.json" ]]; then
|
||||
echo ""
|
||||
python3 - "$BEFORE_DIR/system-state.json" "$AFTER_DIR/system-state.json" << 'PYEOF'
|
||||
import sys, json
|
||||
|
||||
with open(sys.argv[1]) as f:
|
||||
before = json.load(f)
|
||||
with open(sys.argv[2]) as f:
|
||||
after = json.load(f)
|
||||
|
||||
changes = []
|
||||
# Check key config differences
|
||||
b_mem = before.get("memory", {})
|
||||
a_mem = after.get("memory", {})
|
||||
if b_mem.get("vram_total_bytes") != a_mem.get("vram_total_bytes"):
|
||||
bv = b_mem.get("vram_total_bytes", 0) / 2**30
|
||||
av = a_mem.get("vram_total_bytes", 0) / 2**30
|
||||
changes.append(f" VRAM: {bv:.1f} GiB -> {av:.1f} GiB")
|
||||
if b_mem.get("gtt_total_bytes") != a_mem.get("gtt_total_bytes"):
|
||||
bg = b_mem.get("gtt_total_bytes", 0) / 2**30
|
||||
ag = a_mem.get("gtt_total_bytes", 0) / 2**30
|
||||
changes.append(f" GTT: {bg:.1f} GiB -> {ag:.1f} GiB")
|
||||
|
||||
b_kern = before.get("kernel", {})
|
||||
a_kern = after.get("kernel", {})
|
||||
for param in ["param_iommu", "param_gttsize", "param_pages_limit"]:
|
||||
bv = b_kern.get(param, "")
|
||||
av = a_kern.get(param, "")
|
||||
if bv != av:
|
||||
changes.append(f" {param}: '{bv}' -> '{av}'")
|
||||
|
||||
bt = before.get("tuned_profile", "")
|
||||
at = after.get("tuned_profile", "")
|
||||
if bt != at:
|
||||
changes.append(f" tuned: {bt} -> {at}")
|
||||
|
||||
if changes:
|
||||
print(" Configuration changes:")
|
||||
for c in changes:
|
||||
print(c)
|
||||
else:
|
||||
print(" No configuration changes detected")
|
||||
PYEOF
|
||||
fi
|
||||
|
||||
# Compare results
|
||||
echo ""
|
||||
python3 - "$BEFORE_DIR/summary.json" "$AFTER_DIR/summary.json" << 'PYEOF'
|
||||
import sys, json
|
||||
|
||||
with open(sys.argv[1]) as f:
|
||||
before = json.load(f)
|
||||
with open(sys.argv[2]) as f:
|
||||
after = json.load(f)
|
||||
|
||||
# Index by (model, backend, test)
|
||||
def index_results(data):
|
||||
idx = {}
|
||||
for r in data.get("results", []):
|
||||
key = (r["model"], r["backend"], r["test"])
|
||||
idx[key] = r["tokens_per_sec"]
|
||||
return idx
|
||||
|
||||
b_idx = index_results(before)
|
||||
a_idx = index_results(after)
|
||||
|
||||
all_keys = sorted(set(b_idx.keys()) | set(a_idx.keys()))
|
||||
|
||||
if not all_keys:
|
||||
print(" No comparable results found.")
|
||||
sys.exit(0)
|
||||
|
||||
fmt = " {:<18} {:<14} {:<7} {:>9} {:>9} {:>8}"
|
||||
print(fmt.format("Model", "Backend", "Test", "Before", "After", "Delta"))
|
||||
print(" " + "-" * 70)
|
||||
|
||||
for key in all_keys:
|
||||
model, backend, test = key
|
||||
b_val = b_idx.get(key)
|
||||
a_val = a_idx.get(key)
|
||||
|
||||
b_str = f"{b_val:.1f}" if b_val else "—"
|
||||
a_str = f"{a_val:.1f}" if a_val else "—"
|
||||
|
||||
if b_val and a_val:
|
||||
delta_pct = (a_val - b_val) / b_val * 100
|
||||
if delta_pct > 0:
|
||||
d_str = f"\033[32m+{delta_pct:.1f}%\033[0m"
|
||||
elif delta_pct < 0:
|
||||
d_str = f"\033[31m{delta_pct:.1f}%\033[0m"
|
||||
else:
|
||||
d_str = "0.0%"
|
||||
else:
|
||||
d_str = "—"
|
||||
|
||||
print(fmt.format(model[:18], backend[:14], test, b_str, a_str, d_str))
|
||||
|
||||
print()
|
||||
PYEOF
|
||||
223
scripts/benchmark/run-baseline.sh
Normal file
223
scripts/benchmark/run-baseline.sh
Normal file
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env bash
|
||||
# Capture pre-optimization baseline benchmark
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/../../lib/common.sh"
|
||||
source "$SCRIPT_DIR/../../lib/detect.sh"
|
||||
source "$SCRIPT_DIR/../../lib/format.sh"
|
||||
|
||||
MODEL_DIR="$(data_dir models)"
|
||||
TS="$(timestamp)"
|
||||
RESULT_DIR="$(data_dir baselines)/$TS"
|
||||
mkdir -p "$RESULT_DIR"
|
||||
|
||||
REPS_STANDARD=5
|
||||
REPS_LONGCTX=3
|
||||
|
||||
log_header "Baseline Benchmark Capture"
|
||||
log_info "Results will be saved to: $RESULT_DIR"
|
||||
|
||||
# ── 1. Save system state ────────────────────────────────
|
||||
log_info "Capturing system state..."
|
||||
bash "$SCRIPT_DIR/../audit/system-report.sh" --json > "$RESULT_DIR/system-state.json" 2>/dev/null
|
||||
|
||||
# ── 2. Discover available toolboxes and models ──────────
|
||||
existing="$(detect_toolbox_names 2>/dev/null || true)"
|
||||
|
||||
# Map toolbox names to llama-bench commands (same pattern as upstream)
|
||||
declare -A BENCH_PATHS=(
|
||||
[llama-vulkan-radv]="/usr/sbin/llama-bench"
|
||||
[llama-vulkan-amdvlk]="/usr/sbin/llama-bench"
|
||||
[llama-rocm-6.4.4]="/usr/local/bin/llama-bench"
|
||||
[llama-rocm-7.2]="/usr/local/bin/llama-bench"
|
||||
[llama-rocm7-nightlies]="/usr/local/bin/llama-bench"
|
||||
)
|
||||
|
||||
available_backends=()
|
||||
for tb in "${!BENCH_PATHS[@]}"; do
|
||||
if echo "$existing" | grep -q "^${tb}$"; then
|
||||
available_backends+=("$tb")
|
||||
log_success "Backend: $tb"
|
||||
fi
|
||||
done
|
||||
|
||||
if (( ${#available_backends[@]} == 0 )); then
|
||||
log_error "No toolbox backends found. Run: make benchmark-setup"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find models
|
||||
mapfile -t MODEL_PATHS < <(
|
||||
find "$MODEL_DIR" -type f -name '*.gguf' \
|
||||
\( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \
|
||||
| sort
|
||||
)
|
||||
|
||||
if (( ${#MODEL_PATHS[@]} == 0 )); then
|
||||
log_error "No GGUF models found in $MODEL_DIR. Run: make benchmark-setup"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_info "Found ${#MODEL_PATHS[@]} model(s):"
|
||||
for p in "${MODEL_PATHS[@]}"; do
|
||||
printf " %s (%s)\n" "$(basename "$p")" "$(du -h "$p" | cut -f1)"
|
||||
done
|
||||
|
||||
# ── 3. Start metric logging ─────────────────────────────
|
||||
METRICS_FILE="$RESULT_DIR/metrics.csv"
|
||||
bash "$SCRIPT_DIR/../monitor/log-metrics.sh" --output "$METRICS_FILE" --interval 2 &
|
||||
METRICS_PID=$!
|
||||
log_info "Metric logger started (PID: $METRICS_PID)"
|
||||
|
||||
cleanup() {
|
||||
kill "$METRICS_PID" 2>/dev/null || true
|
||||
wait "$METRICS_PID" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# ── 4. Run benchmarks ───────────────────────────────────
|
||||
for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
||||
MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
|
||||
|
||||
for BACKEND in "${available_backends[@]}"; do
|
||||
BENCH_BIN="${BENCH_PATHS[$BACKEND]}"
|
||||
BACKEND_SAFE="${BACKEND//[.-]/_}"
|
||||
|
||||
# Build environment args for ROCm backends
|
||||
ENV_ARGS=()
|
||||
if [[ "$BACKEND" == *rocm* ]]; then
|
||||
ENV_ARGS=(env ROCBLAS_USE_HIPBLASLT=1)
|
||||
fi
|
||||
|
||||
# Standard test (pp512 + tg128, default context)
|
||||
OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log"
|
||||
if [[ ! -s "$OUT" ]]; then
|
||||
printf "\n${BOLD}>> [%s] %s — standard test${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
||||
CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||
-ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1 -r "$REPS_STANDARD")
|
||||
|
||||
printf " cmd: %s\n" "${CMD[*]}"
|
||||
if "${CMD[@]}" > "$OUT" 2>&1; then
|
||||
log_success "Standard test complete"
|
||||
tail -5 "$OUT"
|
||||
else
|
||||
log_error "Standard test failed (exit $?)"
|
||||
echo "FAILED" >> "$OUT"
|
||||
fi
|
||||
else
|
||||
log_info "Skipping standard test (log exists): $OUT"
|
||||
fi
|
||||
|
||||
# Long-context test (pp2048, tg32, ctx 32768)
|
||||
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
|
||||
if [[ ! -s "$OUT_LC" ]]; then
|
||||
printf "\n${BOLD}>> [%s] %s — long-context test${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
||||
|
||||
UB_SIZE=2048
|
||||
[[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
|
||||
|
||||
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||
-ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1
|
||||
-p 2048 -n 32 -d 32768 -ub "$UB_SIZE"
|
||||
-r "$REPS_LONGCTX")
|
||||
|
||||
printf " cmd: %s\n" "${CMD_LC[*]}"
|
||||
if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
|
||||
log_success "Long-context test complete"
|
||||
tail -5 "$OUT_LC"
|
||||
else
|
||||
log_error "Long-context test failed (exit $?)"
|
||||
echo "FAILED" >> "$OUT_LC"
|
||||
fi
|
||||
else
|
||||
log_info "Skipping long-context test (log exists): $OUT_LC"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# ── 5. Parse results into summary JSON ──────────────────
|
||||
log_info "Parsing results..."
|
||||
SUMMARY="$RESULT_DIR/summary.json"
|
||||
|
||||
python3 - "$RESULT_DIR" > "$SUMMARY" << 'PYEOF'
|
||||
import sys, os, re, json
|
||||
from pathlib import Path
|
||||
|
||||
result_dir = Path(sys.argv[1])
|
||||
results = []
|
||||
|
||||
for logfile in sorted(result_dir.glob("*.log")):
|
||||
content = logfile.read_text()
|
||||
if "FAILED" in content:
|
||||
continue
|
||||
|
||||
# Parse the pipe-delimited llama-bench table
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
if not line.startswith("|") or "model" in line.lower() and "size" in line.lower():
|
||||
continue
|
||||
if "---" in line:
|
||||
continue
|
||||
|
||||
parts = [p.strip() for p in line.split("|")]
|
||||
if len(parts) < 10:
|
||||
continue
|
||||
|
||||
# Columns: | model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
try:
|
||||
test_type = parts[8].strip() if len(parts) > 8 else ""
|
||||
ts_raw = parts[9].strip() if len(parts) > 9 else ""
|
||||
if not test_type or not ts_raw:
|
||||
continue
|
||||
|
||||
# Parse "548.18 +/- 1.59" or just "548.18"
|
||||
ts_match = re.match(r'([\d.]+)', ts_raw)
|
||||
if not ts_match:
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"file": logfile.name,
|
||||
"model": parts[1].strip(),
|
||||
"size": parts[2].strip(),
|
||||
"backend": parts[4].strip(),
|
||||
"test": test_type,
|
||||
"tokens_per_sec": float(ts_match.group(1)),
|
||||
"raw": ts_raw,
|
||||
})
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
print(json.dumps({"results": results}, indent=2))
|
||||
PYEOF
|
||||
|
||||
# ── 6. Display summary ──────────────────────────────────
|
||||
log_header "Baseline Results"
|
||||
|
||||
python3 - "$SUMMARY" << 'PYEOF'
|
||||
import sys, json
|
||||
|
||||
with open(sys.argv[1]) as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not data["results"]:
|
||||
print(" No results parsed. Check log files for errors.")
|
||||
sys.exit(0)
|
||||
|
||||
# Print table
|
||||
fmt = " {:<20} {:<16} {:<8} {:>10}"
|
||||
print(fmt.format("Model", "Backend", "Test", "t/s"))
|
||||
print(" " + "-" * 58)
|
||||
for r in data["results"]:
|
||||
print(fmt.format(
|
||||
r["model"][:20],
|
||||
r["backend"][:16],
|
||||
r["test"],
|
||||
f"{r['tokens_per_sec']:.2f}"
|
||||
))
|
||||
PYEOF
|
||||
|
||||
echo ""
|
||||
log_success "Baseline saved to: $RESULT_DIR"
|
||||
log_info "Files: system-state.json, summary.json, metrics.csv, *.log"
|
||||
log_info "Compare later with: bin/benchmark compare $RESULT_DIR <new-run-dir>"
|
||||
194
scripts/benchmark/run-suite.sh
Normal file
194
scripts/benchmark/run-suite.sh
Normal file
@@ -0,0 +1,194 @@
|
||||
#!/usr/bin/env bash
|
||||
# Full benchmark suite — run all backends × models with tagging
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/../../lib/common.sh"
|
||||
source "$SCRIPT_DIR/../../lib/detect.sh"
|
||||
source "$SCRIPT_DIR/../../lib/format.sh"
|
||||
|
||||
MODEL_DIR="$(data_dir models)"
|
||||
TAG="run"
|
||||
BACKENDS_FILTER=""
|
||||
MODELS_FILTER=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--tag|-t) TAG="$2"; shift 2 ;;
|
||||
--backends|-b) BACKENDS_FILTER="$2"; shift 2 ;;
|
||||
--models|-m) MODELS_FILTER="$2"; shift 2 ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
TS="$(timestamp)"
|
||||
RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
|
||||
mkdir -p "$RESULT_DIR"
|
||||
|
||||
REPS_STANDARD=5
|
||||
REPS_LONGCTX=3
|
||||
|
||||
log_header "Benchmark Suite: $TAG"
|
||||
log_info "Results: $RESULT_DIR"
|
||||
|
||||
# Save system state
|
||||
bash "$SCRIPT_DIR/../audit/system-report.sh" --json > "$RESULT_DIR/system-state.json" 2>/dev/null
|
||||
|
||||
# Discover backends
|
||||
existing="$(detect_toolbox_names 2>/dev/null || true)"
|
||||
|
||||
declare -A BENCH_PATHS=(
|
||||
[llama-vulkan-radv]="/usr/sbin/llama-bench"
|
||||
[llama-vulkan-amdvlk]="/usr/sbin/llama-bench"
|
||||
[llama-rocm-6.4.4]="/usr/local/bin/llama-bench"
|
||||
[llama-rocm-7.2]="/usr/local/bin/llama-bench"
|
||||
[llama-rocm7-nightlies]="/usr/local/bin/llama-bench"
|
||||
)
|
||||
|
||||
available_backends=()
|
||||
for tb in "${!BENCH_PATHS[@]}"; do
|
||||
if echo "$existing" | grep -q "^${tb}$"; then
|
||||
if [[ -z "$BACKENDS_FILTER" ]] || echo "$BACKENDS_FILTER" | tr ',' '\n' | grep -q "$tb"; then
|
||||
available_backends+=("$tb")
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if (( ${#available_backends[@]} == 0 )); then
|
||||
log_error "No matching backends. Run: make benchmark-setup"
|
||||
exit 1
|
||||
fi
|
||||
log_info "Backends: ${available_backends[*]}"
|
||||
|
||||
# Find models
|
||||
mapfile -t MODEL_PATHS < <(
|
||||
find "$MODEL_DIR" -type f -name '*.gguf' \
|
||||
\( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \
|
||||
| sort
|
||||
)
|
||||
|
||||
if [[ -n "$MODELS_FILTER" ]]; then
|
||||
filtered=()
|
||||
for p in "${MODEL_PATHS[@]}"; do
|
||||
name="$(basename "$p")"
|
||||
if echo "$MODELS_FILTER" | tr ',' '\n' | grep -qi "$name"; then
|
||||
filtered+=("$p")
|
||||
fi
|
||||
done
|
||||
MODEL_PATHS=("${filtered[@]}")
|
||||
fi
|
||||
|
||||
if (( ${#MODEL_PATHS[@]} == 0 )); then
|
||||
log_error "No models found. Run: make benchmark-setup"
|
||||
exit 1
|
||||
fi
|
||||
log_info "Models: ${#MODEL_PATHS[@]}"
|
||||
|
||||
# Start metric logging
|
||||
METRICS_FILE="$RESULT_DIR/metrics.csv"
|
||||
bash "$SCRIPT_DIR/../monitor/log-metrics.sh" --output "$METRICS_FILE" --interval 2 &
|
||||
METRICS_PID=$!
|
||||
trap 'kill "$METRICS_PID" 2>/dev/null; wait "$METRICS_PID" 2>/dev/null' EXIT
|
||||
|
||||
# Run benchmarks (same logic as run-baseline.sh)
|
||||
for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
||||
MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
|
||||
|
||||
for BACKEND in "${available_backends[@]}"; do
|
||||
BENCH_BIN="${BENCH_PATHS[$BACKEND]}"
|
||||
BACKEND_SAFE="${BACKEND//[.-]/_}"
|
||||
|
||||
ENV_ARGS=()
|
||||
[[ "$BACKEND" == *rocm* ]] && ENV_ARGS=(env ROCBLAS_USE_HIPBLASLT=1)
|
||||
|
||||
# Standard test
|
||||
OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log"
|
||||
if [[ ! -s "$OUT" ]]; then
|
||||
printf "\n${BOLD}>> [%s] %s — standard${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
||||
CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||
-ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1 -r "$REPS_STANDARD")
|
||||
if "${CMD[@]}" > "$OUT" 2>&1; then
|
||||
log_success "Done"; tail -3 "$OUT"
|
||||
else
|
||||
log_error "Failed"; echo "FAILED" >> "$OUT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Long-context test
|
||||
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
|
||||
if [[ ! -s "$OUT_LC" ]]; then
|
||||
printf "\n${BOLD}>> [%s] %s — longctx${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
||||
UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
|
||||
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||
-ngl 99 -mmp 0 -m "$MODEL_PATH" -fa 1
|
||||
-p 2048 -n 32 -d 32768 -ub "$UB_SIZE" -r "$REPS_LONGCTX")
|
||||
if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
|
||||
log_success "Done"; tail -3 "$OUT_LC"
|
||||
else
|
||||
log_error "Failed"; echo "FAILED" >> "$OUT_LC"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Parse results
|
||||
SUMMARY="$RESULT_DIR/summary.json"
|
||||
# Parse llama-bench log files into summary JSON
|
||||
python3 - "$RESULT_DIR" > "$SUMMARY" << 'PYEOF'
|
||||
import sys, os, re, json
|
||||
from pathlib import Path
|
||||
|
||||
result_dir = Path(sys.argv[1])
|
||||
results = []
|
||||
|
||||
for logfile in sorted(result_dir.glob("*.log")):
|
||||
content = logfile.read_text()
|
||||
if "FAILED" in content:
|
||||
continue
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
if not line.startswith("|") or "model" in line.lower() and "size" in line.lower():
|
||||
continue
|
||||
if "---" in line:
|
||||
continue
|
||||
parts = [p.strip() for p in line.split("|")]
|
||||
if len(parts) < 10:
|
||||
continue
|
||||
try:
|
||||
test_type = parts[8].strip()
|
||||
ts_raw = parts[9].strip()
|
||||
ts_match = re.match(r'([\d.]+)', ts_raw)
|
||||
if not ts_match:
|
||||
continue
|
||||
results.append({
|
||||
"file": logfile.name,
|
||||
"model": parts[1].strip(),
|
||||
"size": parts[2].strip(),
|
||||
"backend": parts[4].strip(),
|
||||
"test": test_type,
|
||||
"tokens_per_sec": float(ts_match.group(1)),
|
||||
"raw": ts_raw,
|
||||
})
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
print(json.dumps({"results": results}, indent=2))
|
||||
PYEOF
|
||||
|
||||
log_header "Results"
|
||||
python3 - "$SUMMARY" << 'PYEOF'
|
||||
import sys, json
|
||||
with open(sys.argv[1]) as f:
|
||||
data = json.load(f)
|
||||
if not data["results"]:
|
||||
print(" No results parsed.")
|
||||
sys.exit(0)
|
||||
fmt = " {:<20} {:<16} {:<8} {:>10}"
|
||||
print(fmt.format("Model", "Backend", "Test", "t/s"))
|
||||
print(" " + "-" * 58)
|
||||
for r in data["results"]:
|
||||
print(fmt.format(r["model"][:20], r["backend"][:16], r["test"], f"{r['tokens_per_sec']:.2f}"))
|
||||
PYEOF
|
||||
|
||||
echo ""
|
||||
log_success "Results saved to: $RESULT_DIR"
|
||||
106
scripts/benchmark/setup.sh
Normal file
106
scripts/benchmark/setup.sh
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env bash
|
||||
# Benchmark setup — ensure toolboxes and test models are ready
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
source "$SCRIPT_DIR/../../lib/common.sh"
|
||||
source "$SCRIPT_DIR/../../lib/detect.sh"
|
||||
|
||||
TOOLBOXES_REPO="/data/workspace/projects/HomeLab/strix-halo-toolboxes/amd-strix-halo-llamacpp-toolboxes"
|
||||
MODEL_DIR="$(data_dir models)"
|
||||
|
||||
log_header "Benchmark Setup"
|
||||
|
||||
# ── 1. Check toolbox containers ──────────────────────────
|
||||
log_info "Checking toolbox containers..."
|
||||
|
||||
# Minimum required: vulkan-radv (most stable)
|
||||
REQUIRED_TOOLBOXES=("llama-vulkan-radv")
|
||||
OPTIONAL_TOOLBOXES=("llama-rocm-6.4.4" "llama-rocm-7.2" "llama-vulkan-amdvlk")
|
||||
|
||||
existing=$(detect_toolbox_names 2>/dev/null || true)
|
||||
missing=()
|
||||
|
||||
for tb in "${REQUIRED_TOOLBOXES[@]}"; do
|
||||
if echo "$existing" | grep -q "^${tb}$"; then
|
||||
log_success "Toolbox: $tb"
|
||||
else
|
||||
missing+=("$tb")
|
||||
log_warn "Toolbox missing: $tb"
|
||||
fi
|
||||
done
|
||||
|
||||
for tb in "${OPTIONAL_TOOLBOXES[@]}"; do
|
||||
if echo "$existing" | grep -q "^${tb}$"; then
|
||||
log_success "Toolbox: $tb (optional)"
|
||||
else
|
||||
log_info "Toolbox not present: $tb (optional)"
|
||||
fi
|
||||
done
|
||||
|
||||
if (( ${#missing[@]} > 0 )); then
|
||||
log_info "Need to create required toolboxes."
|
||||
if [[ -d "$TOOLBOXES_REPO" ]]; then
|
||||
log_info "Found toolboxes repo at: $TOOLBOXES_REPO"
|
||||
if confirm "Create missing toolboxes using refresh-toolboxes.sh?"; then
|
||||
for tb in "${missing[@]}"; do
|
||||
log_info "Creating $tb..."
|
||||
bash "$TOOLBOXES_REPO/refresh-toolboxes.sh" "$tb"
|
||||
done
|
||||
fi
|
||||
else
|
||||
log_error "Toolboxes repo not found at: $TOOLBOXES_REPO"
|
||||
log_info "Clone it: git clone https://github.com/kyuz0/amd-strix-halo-toolboxes"
|
||||
log_info "Then re-run this setup."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── 2. Verify GPU access inside toolboxes ────────────────
|
||||
log_info "Verifying GPU access in toolboxes..."
|
||||
for tb in "${REQUIRED_TOOLBOXES[@]}"; do
|
||||
if echo "$existing" | grep -qF "$tb"; then
|
||||
if toolbox run -c "$tb" -- llama-cli --list-devices 2>&1 | grep -qi "gpu\|vulkan\|rocm"; then
|
||||
log_success "GPU accessible in $tb"
|
||||
else
|
||||
log_warn "GPU may not be accessible in $tb — check device mappings"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# ── 3. Check for test models ────────────────────────────
|
||||
log_info "Checking for test models in $MODEL_DIR..."
|
||||
|
||||
model_count=$(find "$MODEL_DIR" -name "*.gguf" 2>/dev/null | wc -l)
|
||||
if (( model_count > 0 )); then
|
||||
log_success "Found $model_count model(s):"
|
||||
find "$MODEL_DIR" -name "*.gguf" | while read -r f; do
|
||||
size=$(du -h "$f" | cut -f1)
|
||||
printf " %s (%s)\n" "$(basename "$f")" "$size"
|
||||
done
|
||||
else
|
||||
log_warn "No GGUF models found in $MODEL_DIR"
|
||||
log_info "Download a test model. Example:"
|
||||
echo ""
|
||||
echo " # Small (4B, ~3 GB):"
|
||||
echo " huggingface-cli download Qwen/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf \\"
|
||||
echo " --local-dir $MODEL_DIR"
|
||||
echo ""
|
||||
echo " # Medium (14B, ~9 GB):"
|
||||
echo " huggingface-cli download Qwen/Qwen3-14B-GGUF Qwen3-14B-Q4_K_M.gguf \\"
|
||||
echo " --local-dir $MODEL_DIR"
|
||||
echo ""
|
||||
|
||||
if is_cmd huggingface-cli; then
|
||||
if confirm "Download Qwen3-4B Q4_K_M (~3 GB) as test model?"; then
|
||||
huggingface-cli download Qwen/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf \
|
||||
--local-dir "$MODEL_DIR"
|
||||
log_success "Model downloaded"
|
||||
fi
|
||||
else
|
||||
log_info "Install huggingface-cli: pip install huggingface_hub[cli]"
|
||||
fi
|
||||
fi
|
||||
|
||||
log_header "Setup Complete"
|
||||
log_info "Run 'make benchmark-baseline' to capture your baseline."
|
||||
Reference in New Issue
Block a user