feat(benchmark): add -b/--batch flag, test MoE batch size impact

Add batch size override to benchmark scripts. Testing -b 256 vs default 2048 on Vulkan RADV shows no meaningful difference for MoE pp2048 (826 vs 843 t/s, within noise). Community-reported +70% improvement does not reproduce on this backend.
2026-03-30 20:01:24 +02:00
parent ea70687cd2
commit ba24091791
3 changed files with 36 additions and 11 deletions
--- a/scripts/benchmark/run-suite.sh
+++ b/scripts/benchmark/run-suite.sh
@@ -20,6 +20,7 @@ CTX_DEPTH=32768
 CTX_PROMPT=2048
 PP_TOKENS=512
 TG_TOKENS=128
+BATCH_SIZE=""    # Batch size override (-b flag, empty = llama-bench default 2048)
 KV_TYPES_RAW=""  # Comma-separated KV cache types to sweep (e.g. f16,q8_0,q4_0 or q4_0:q8_0)

 while [[ $# -gt 0 ]]; do
@@ -34,6 +35,7 @@ while [[ $# -gt 0 ]]; do
        --context|-d)    CTX_DEPTH="$2"; shift 2 ;;
        --pp)            PP_TOKENS="$2"; shift 2 ;;
        --tg)            TG_TOKENS="$2"; shift 2 ;;
+        -b|--batch)      BATCH_SIZE="$2"; shift 2 ;;
        --kv-types)      KV_TYPES_RAW="$2"; shift 2 ;;
        --help|-h)
            echo "Usage: run-suite.sh [OPTIONS]"
@@ -49,6 +51,7 @@ while [[ $# -gt 0 ]]; do
            echo "  --context N          Long-context depth in tokens (default: 32768)"
            echo "  --pp N               Prompt processing tokens (default: 512)"
            echo "  --tg N               Token generation count (default: 128)"
+            echo "  -b, --batch N        Batch size (default: 2048, try 256 for MoE)"
            echo "  --kv-types LIST      KV cache sweep: comma-separated types to test"
            echo "                       Each entry: TYPE (both K+V) or K_TYPE:V_TYPE"
            echo "                       Types: f16, q8_0, q4_0, q4_1"
@@ -213,16 +216,24 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
                KV_SUFFIX="__kv_${KV_K}-${KV_V}"
            fi

+            # Build batch size args
+            BATCH_ARGS=()
+            BATCH_SUFFIX=""
+            if [[ -n "$BATCH_SIZE" ]]; then
+                BATCH_ARGS+=(-b "$BATCH_SIZE")
+                BATCH_SUFFIX="__b${BATCH_SIZE}"
+            fi
+
            # Standard test
            local_suffix="fa1"
            [[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}"
-            OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}${KV_SUFFIX}.log"
+            OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}${KV_SUFFIX}${BATCH_SUFFIX}.log"
            if [[ ! -s "$OUT" ]]; then
                printf "\n${BOLD}>> [%s] %s — pp%s/tg%s  KV=%s${RESET}\n" \
                    "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS" "${KV_K}/${KV_V}"
                CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
                    -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
-                    -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD" "${KV_ARGS[@]}")
+                    -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD" "${BATCH_ARGS[@]}" "${KV_ARGS[@]}")
                if "${CMD[@]}" > "$OUT" 2>&1; then
                    log_success "Done"; tail -3 "$OUT"
                else
@@ -234,7 +245,7 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
            if $SKIP_LONGCTX; then
                continue
            fi
-            OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}${KV_SUFFIX}.log"
+            OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}${KV_SUFFIX}${BATCH_SUFFIX}.log"
            if [[ ! -s "$OUT_LC" ]]; then
                printf "\n${BOLD}>> [%s] %s — longctx %s  KV=%s${RESET}\n" \
                    "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH" "${KV_K}/${KV_V}"