- Fix missing BATCH_ARGS in long-context commands (both benchmark scripts) - Fix CLAUDE.md stale venv path (data/venv → .venv) and add serve/power docs - Add -b/--batch to bin/benchmark help text - Add --no-think flag to serve script (--reasoning-budget 0) - Sanitize model names in eval run directories - Simplify agentic setup to use requirements.txt - Add serve --help test, batch flag assertions to existing tests - Add requirements.txt for reproducible venv setup (Python 3.13)
36 lines
1.6 KiB
Bash
Executable File
36 lines
1.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Benchmark dispatcher
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
|
|
case "${1:-help}" in
|
|
setup) exec bash "$SCRIPT_DIR/scripts/benchmark/setup.sh" ;;
|
|
baseline) exec bash "$SCRIPT_DIR/scripts/benchmark/run-baseline.sh" "${@:2}" ;;
|
|
run) exec bash "$SCRIPT_DIR/scripts/benchmark/run-suite.sh" "${@:2}" ;;
|
|
compare) exec bash "$SCRIPT_DIR/scripts/benchmark/compare.sh" "${@:2}" ;;
|
|
*)
|
|
echo "Usage: benchmark <command> [options]"
|
|
echo ""
|
|
echo "Commands:"
|
|
echo " setup Ensure toolboxes and test models are ready"
|
|
echo " baseline Capture pre-optimization baseline"
|
|
echo " run Run full benchmark suite"
|
|
echo " compare Compare two runs (DIR1 DIR2)"
|
|
echo ""
|
|
echo "Filtering options (baseline and run):"
|
|
echo " --max-size GB Only models up to this file size"
|
|
echo " --category LIST Comma-separated: smoke,dense,moe"
|
|
echo " --skip-longctx Skip long-context (32K) tests"
|
|
echo " --reps N Standard test repetitions (default: 5)"
|
|
echo " -b, --batch N Batch size (default: 2048, try 256 for MoE)"
|
|
echo " --kv-types LIST KV cache sweep (e.g. f16,q8_0,q4_0 or q4_0:q8_0)"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " benchmark baseline --max-size 20 --skip-longctx"
|
|
echo " benchmark run --tag post-opt --category moe"
|
|
echo " benchmark run --tag kv-sweep --kv-types f16,q8_0,q4_0 --context 131072"
|
|
exit 1
|
|
;;
|
|
esac
|