diff --git a/bin/benchmark b/bin/benchmark
index 4bdc124..6df7533 100755
--- a/bin/benchmark
+++ b/bin/benchmark
@@ -23,10 +23,12 @@ case "${1:-help}" in
         echo "  --category LIST     Comma-separated: smoke,dense,moe"
         echo "  --skip-longctx      Skip long-context (32K) tests"
         echo "  --reps N            Standard test repetitions (default: 5)"
+        echo "  --kv-types LIST     KV cache sweep (e.g. f16,q8_0,q4_0 or q4_0:q8_0)"
         echo ""
         echo "Examples:"
         echo "  benchmark baseline --max-size 20 --skip-longctx"
         echo "  benchmark run --tag post-opt --category moe"
+        echo "  benchmark run --tag kv-sweep --kv-types f16,q8_0,q4_0 --context 131072"
         exit 1
         ;;
 esac
diff --git a/scripts/benchmark/run-baseline.sh b/scripts/benchmark/run-baseline.sh
index c5104be..700766b 100644
--- a/scripts/benchmark/run-baseline.sh
+++ b/scripts/benchmark/run-baseline.sh
@@ -21,6 +21,7 @@ CTX_DEPTH=32768
 CTX_PROMPT=2048
 PP_TOKENS=512
 TG_TOKENS=128
+KV_TYPES_RAW=""  # Comma-separated KV cache types to sweep (e.g. f16,q8_0,q4_0 or q4_0:q8_0)
 
 while [[ $# -gt 0 ]]; do
     case "$1" in
@@ -31,6 +32,7 @@ while [[ $# -gt 0 ]]; do
         --context|-d)    CTX_DEPTH="$2"; shift 2 ;;
         --pp)            PP_TOKENS="$2"; shift 2 ;;
         --tg)            TG_TOKENS="$2"; shift 2 ;;
+        --kv-types)      KV_TYPES_RAW="$2"; shift 2 ;;
         --help|-h)
             echo "Usage: run-baseline.sh [OPTIONS]"
             echo ""
@@ -42,11 +44,15 @@ while [[ $# -gt 0 ]]; do
             echo "  --context N          Long-context depth in tokens (default: 32768)"
             echo "  --pp N               Prompt processing tokens (default: 512)"
             echo "  --tg N               Token generation count (default: 128)"
+            echo "  --kv-types LIST      KV cache sweep: comma-separated types to test"
+            echo "                       Each entry: TYPE (both K+V) or K_TYPE:V_TYPE"
+            echo "                       Types: f16, q8_0, q4_0, q4_1"
             echo ""
             echo "Examples:"
             echo "  run-baseline.sh --max-size 20               # Only models ≤20 GB"
             echo "  run-baseline.sh --context 131072 --category moe  # 128K context on MoE"
             echo "  run-baseline.sh --tg 1024 --pp 2048 --category moe  # Realistic agentic"
+            echo "  run-baseline.sh --kv-types f16,q8_0,q4_0 --context 131072  # KV sweep"
             echo "  run-baseline.sh --skip-longctx --max-size 15 # Quick safe run"
             exit 0 ;;
         *) log_warn "Unknown argument: $1"; shift ;;
@@ -59,11 +65,19 @@ if (( CTX_DEPTH > 32768 )); then
     (( CTX_PROMPT < 512 )) && CTX_PROMPT=512
 fi
 
+# Parse KV cache types for sweep
+if [[ -n "$KV_TYPES_RAW" ]]; then
+    IFS=',' read -ra KV_TYPES <<< "$KV_TYPES_RAW"
+else
+    KV_TYPES=("f16")
+fi
+
 log_header "Baseline Benchmark Capture"
 log_info "Results will be saved to: $RESULT_DIR"
 $SKIP_LONGCTX && log_info "Long-context tests: SKIPPED"
 (( MAX_SIZE_GB > 0 )) && log_info "Max model size: ${MAX_SIZE_GB} GB"
 [[ -n "$CATEGORY_FILTER" ]] && log_info "Categories: $CATEGORY_FILTER"
+(( ${#KV_TYPES[@]} > 1 )) && log_info "KV cache sweep: ${KV_TYPES[*]}"
 
 # ── 1. Save system state ────────────────────────────────
 log_info "Capturing system state..."
@@ -165,9 +179,8 @@ log_info "Metric logger started (PID: $METRICS_PID)"
 cleanup() {
     kill "$METRICS_PID" 2>/dev/null || true
     wait "$METRICS_PID" 2>/dev/null || true
-    return 0
 }
-trap cleanup EXIT
+trap 'cleanup; exit 0' EXIT
 
 # ── 5. Run benchmarks ───────────────────────────────────
 for MODEL_PATH in "${MODEL_PATHS[@]}"; do
@@ -189,56 +202,77 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
             TOOLBOX_MODEL_PATH="/run/host${TOOLBOX_MODEL_PATH}"
         fi
 
-        # Standard test
-        local_suffix="fa1"
-        [[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}"
-        OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}.log"
-        if [[ ! -s "$OUT" ]]; then
-            printf "\n${BOLD}>> [%s] %s — pp%s/tg%s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS"
-            CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-                -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
-                -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD")
-
-            printf "  cmd: %s\n" "${CMD[*]}"
-            if "${CMD[@]}" > "$OUT" 2>&1; then
-                log_success "Standard test complete"
-                tail -5 "$OUT"
+        for KV_SPEC in "${KV_TYPES[@]}"; do
+            # Parse KV spec: "q8_0" → K=q8_0,V=q8_0 or "q4_0:q8_0" → K=q4_0,V=q8_0
+            if [[ "$KV_SPEC" == *:* ]]; then
+                KV_K="${KV_SPEC%%:*}"
+                KV_V="${KV_SPEC##*:}"
             else
-                log_error "Standard test failed (exit $?)"
-                echo "FAILED" >> "$OUT"
+                KV_K="$KV_SPEC"
+                KV_V="$KV_SPEC"
             fi
-        else
-            log_info "Skipping standard test (log exists): $OUT"
-        fi
 
-        # Long-context test (pp2048, tg32, ctx 32768)
-        if $SKIP_LONGCTX; then
-            continue
-        fi
+            # Build KV cache args (skip for f16 — it's the default)
+            KV_ARGS=()
+            KV_SUFFIX=""
+            if [[ "$KV_K" != "f16" || "$KV_V" != "f16" ]]; then
+                KV_ARGS+=(-ctk "$KV_K" -ctv "$KV_V")
+                KV_SUFFIX="__kv_${KV_K}_${KV_V}"
+            fi
 
-        OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}.log"
-        if [[ ! -s "$OUT_LC" ]]; then
-            printf "\n${BOLD}>> [%s] %s — long-context %s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH"
+            # Standard test
+            local_suffix="fa1"
+            [[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}"
+            OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}${KV_SUFFIX}.log"
+            if [[ ! -s "$OUT" ]]; then
+                printf "\n${BOLD}>> [%s] %s — pp%s/tg%s  KV=%s${RESET}\n" \
+                    "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS" "${KV_K}/${KV_V}"
+                CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                    -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
+                    -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD" "${KV_ARGS[@]}")
 
-            UB_SIZE=2048
-            [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
-
-            CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-                -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
-                -p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE"
-                -r "$REPS_LONGCTX")
-
-            printf "  cmd: %s\n" "${CMD_LC[*]}"
-            if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
-                log_success "Long-context test complete"
-                tail -5 "$OUT_LC"
+                printf "  cmd: %s\n" "${CMD[*]}"
+                if "${CMD[@]}" > "$OUT" 2>&1; then
+                    log_success "Standard test complete"
+                    tail -5 "$OUT"
+                else
+                    log_error "Standard test failed (exit $?)"
+                    echo "FAILED" >> "$OUT"
+                fi
             else
-                log_error "Long-context test failed (exit $?)"
-                echo "FAILED" >> "$OUT_LC"
+                log_info "Skipping standard test (log exists): $OUT"
             fi
-        else
-            log_info "Skipping long-context test (log exists): $OUT_LC"
-        fi
+
+            # Long-context test
+            if $SKIP_LONGCTX; then
+                continue
+            fi
+
+            OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}${KV_SUFFIX}.log"
+            if [[ ! -s "$OUT_LC" ]]; then
+                printf "\n${BOLD}>> [%s] %s — long-context %s  KV=%s${RESET}\n" \
+                    "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH" "${KV_K}/${KV_V}"
+
+                UB_SIZE=2048
+                [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
+
+                CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                    -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
+                    -p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE"
+                    -r "$REPS_LONGCTX" "${KV_ARGS[@]}")
+
+                printf "  cmd: %s\n" "${CMD_LC[*]}"
+                if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
+                    log_success "Long-context test complete"
+                    tail -5 "$OUT_LC"
+                else
+                    log_error "Long-context test failed (exit $?)"
+                    echo "FAILED" >> "$OUT_LC"
+                fi
+            else
+                log_info "Skipping long-context test (log exists): $OUT_LC"
+            fi
+        done  # KV_TYPES
     done
 done
 
@@ -258,6 +292,10 @@ for logfile in sorted(result_dir.glob("*.log")):
     if "FAILED" in content:
         continue
 
+    # Extract KV cache type from filename (__kv_q8_0_q8_0)
+    kv_match = re.search(r'__kv_([a-z0-9_]+)_([a-z0-9_]+)\.log$', logfile.name)
+    kv_type = f"{kv_match.group(1)}/{kv_match.group(2)}" if kv_match else "f16/f16"
+
     for line in content.splitlines():
         line = line.strip()
         if not line.startswith("|") or ("model" in line.lower() and "size" in line.lower()):
@@ -286,6 +324,7 @@ for logfile in sorted(result_dir.glob("*.log")):
                 "backend": parts[4].strip(),
                 "test": test_type,
                 "tokens_per_sec": float(ts_match.group(1)),
+                "kv_cache": kv_type,
                 "raw": ts_raw,
             })
         except (ValueError, IndexError):
@@ -307,13 +346,14 @@ if not data["results"]:
     print("  No results parsed. Check log files for errors.")
     sys.exit(0)
 
-fmt = "  {:<20} {:<16} {:<8} {:>10}"
-print(fmt.format("Model", "Backend", "Test", "t/s"))
-print("  " + "-" * 58)
+fmt = "  {:<20} {:<16} {:<10} {:<8} {:>10}"
+print(fmt.format("Model", "Backend", "KV cache", "Test", "t/s"))
+print("  " + "-" * 68)
 for r in data["results"]:
     print(fmt.format(
         r["model"][:20],
         r["backend"][:16],
+        r.get("kv_cache", "f16/f16")[:10],
         r["test"],
         f"{r['tokens_per_sec']:.2f}"
     ))
diff --git a/scripts/benchmark/run-suite.sh b/scripts/benchmark/run-suite.sh
index 7049298..e342698 100644
--- a/scripts/benchmark/run-suite.sh
+++ b/scripts/benchmark/run-suite.sh
@@ -20,6 +20,7 @@ CTX_DEPTH=32768
 CTX_PROMPT=2048
 PP_TOKENS=512
 TG_TOKENS=128
+KV_TYPES_RAW=""  # Comma-separated KV cache types to sweep (e.g. f16,q8_0,q4_0 or q4_0:q8_0)
 
 while [[ $# -gt 0 ]]; do
     case "$1" in
@@ -33,6 +34,7 @@ while [[ $# -gt 0 ]]; do
         --context|-d)    CTX_DEPTH="$2"; shift 2 ;;
         --pp)            PP_TOKENS="$2"; shift 2 ;;
         --tg)            TG_TOKENS="$2"; shift 2 ;;
+        --kv-types)      KV_TYPES_RAW="$2"; shift 2 ;;
         --help|-h)
             echo "Usage: run-suite.sh [OPTIONS]"
             echo ""
@@ -47,10 +49,15 @@ while [[ $# -gt 0 ]]; do
             echo "  --context N          Long-context depth in tokens (default: 32768)"
             echo "  --pp N               Prompt processing tokens (default: 512)"
             echo "  --tg N               Token generation count (default: 128)"
+            echo "  --kv-types LIST      KV cache sweep: comma-separated types to test"
+            echo "                       Each entry: TYPE (both K+V) or K_TYPE:V_TYPE"
+            echo "                       Types: f16, q8_0, q4_0, q4_1"
             echo ""
             echo "Examples:"
             echo "  run-suite.sh --tag ctx128k --context 131072 --category moe"
             echo "  run-suite.sh --tag realistic --tg 1024 --pp 2048 --category moe"
+            echo "  run-suite.sh --tag kv-sweep --kv-types f16,q8_0,q4_0 --context 131072"
+            echo "  run-suite.sh --tag kv-mixed --kv-types q8_0,q4_0:q8_0 --context 131072"
             echo "  run-suite.sh --tag post-opt --max-size 20 --skip-longctx"
             exit 0 ;;
         *) log_warn "Unknown argument: $1"; shift ;;
@@ -63,12 +70,20 @@ if (( CTX_DEPTH > 32768 )); then
     (( CTX_PROMPT < 512 )) && CTX_PROMPT=512
 fi
 
+# Parse KV cache types for sweep
+if [[ -n "$KV_TYPES_RAW" ]]; then
+    IFS=',' read -ra KV_TYPES <<< "$KV_TYPES_RAW"
+else
+    KV_TYPES=("f16")
+fi
+
 TS="$(timestamp)"
 RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
 mkdir -p "$RESULT_DIR"
 
 log_header "Benchmark Suite: $TAG"
 log_info "Results: $RESULT_DIR"
+(( ${#KV_TYPES[@]} > 1 )) && log_info "KV cache sweep: ${KV_TYPES[*]}"
 
 # Save system state
 bash "$SCRIPT_DIR/../audit/system-report.sh" --json > "$RESULT_DIR/system-state.json" 2>/dev/null
@@ -157,7 +172,11 @@ log_info "Models: ${#MODEL_PATHS[@]}"
 METRICS_FILE="$RESULT_DIR/metrics.csv"
 bash "$SCRIPT_DIR/../monitor/log-metrics.sh" --output "$METRICS_FILE" --interval 2 &
 METRICS_PID=$!
-trap 'kill "$METRICS_PID" 2>/dev/null; wait "$METRICS_PID" 2>/dev/null; true' EXIT
+cleanup() {
+    kill "$METRICS_PID" 2>/dev/null || true
+    wait "$METRICS_PID" 2>/dev/null || true
+}
+trap 'cleanup; exit 0' EXIT
 
 # Run benchmarks (same logic as run-baseline.sh)
 for MODEL_PATH in "${MODEL_PATHS[@]}"; do
@@ -176,39 +195,60 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
             TOOLBOX_MODEL_PATH="/run/host${TOOLBOX_MODEL_PATH}"
         fi
 
-        # Standard test
-        local_suffix="fa1"
-        [[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}"
-        OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}.log"
-        if [[ ! -s "$OUT" ]]; then
-            printf "\n${BOLD}>> [%s] %s — pp%s/tg%s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS"
-            CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-                -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
-                -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD")
-            if "${CMD[@]}" > "$OUT" 2>&1; then
-                log_success "Done"; tail -3 "$OUT"
+        for KV_SPEC in "${KV_TYPES[@]}"; do
+            # Parse KV spec: "q8_0" → K=q8_0,V=q8_0 or "q4_0:q8_0" → K=q4_0,V=q8_0
+            if [[ "$KV_SPEC" == *:* ]]; then
+                KV_K="${KV_SPEC%%:*}"
+                KV_V="${KV_SPEC##*:}"
             else
-                log_error "Failed"; echo "FAILED" >> "$OUT"
+                KV_K="$KV_SPEC"
+                KV_V="$KV_SPEC"
             fi
-        fi
 
-        # Long-context test
-        if $SKIP_LONGCTX; then
-            continue
-        fi
-        OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}.log"
-        if [[ ! -s "$OUT_LC" ]]; then
-            printf "\n${BOLD}>> [%s] %s — longctx %s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH"
-            UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
-            CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
-                -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
-                -p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE" -r "$REPS_LONGCTX")
-            if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
-                log_success "Done"; tail -3 "$OUT_LC"
-            else
-                log_error "Failed"; echo "FAILED" >> "$OUT_LC"
+            # Build KV cache args (skip for f16 — it's the default)
+            KV_ARGS=()
+            KV_SUFFIX=""
+            if [[ "$KV_K" != "f16" || "$KV_V" != "f16" ]]; then
+                KV_ARGS+=(-ctk "$KV_K" -ctv "$KV_V")
+                KV_SUFFIX="__kv_${KV_K}_${KV_V}"
             fi
-        fi
+
+            # Standard test
+            local_suffix="fa1"
+            [[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}"
+            OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}${KV_SUFFIX}.log"
+            if [[ ! -s "$OUT" ]]; then
+                printf "\n${BOLD}>> [%s] %s — pp%s/tg%s  KV=%s${RESET}\n" \
+                    "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS" "${KV_K}/${KV_V}"
+                CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                    -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
+                    -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD" "${KV_ARGS[@]}")
+                if "${CMD[@]}" > "$OUT" 2>&1; then
+                    log_success "Done"; tail -3 "$OUT"
+                else
+                    log_error "Failed"; echo "FAILED" >> "$OUT"
+                fi
+            fi
+
+            # Long-context test
+            if $SKIP_LONGCTX; then
+                continue
+            fi
+            OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}${KV_SUFFIX}.log"
+            if [[ ! -s "$OUT_LC" ]]; then
+                printf "\n${BOLD}>> [%s] %s — longctx %s  KV=%s${RESET}\n" \
+                    "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH" "${KV_K}/${KV_V}"
+                UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
+                CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
+                    -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
+                    -p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE" -r "$REPS_LONGCTX" "${KV_ARGS[@]}")
+                if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
+                    log_success "Done"; tail -3 "$OUT_LC"
+                else
+                    log_error "Failed"; echo "FAILED" >> "$OUT_LC"
+                fi
+            fi
+        done  # KV_TYPES
     done
 done
 
@@ -226,6 +266,11 @@ for logfile in sorted(result_dir.glob("*.log")):
     content = logfile.read_text()
     if "FAILED" in content:
         continue
+
+    # Extract KV cache type from filename (__kv_q8_0_q8_0)
+    kv_match = re.search(r'__kv_([a-z0-9_]+)_([a-z0-9_]+)\.log$', logfile.name)
+    kv_type = f"{kv_match.group(1)}/{kv_match.group(2)}" if kv_match else "f16/f16"
+
     for line in content.splitlines():
         line = line.strip()
         if not line.startswith("|") or ("model" in line.lower() and "size" in line.lower()):
@@ -248,6 +293,7 @@ for logfile in sorted(result_dir.glob("*.log")):
                 "backend": parts[4].strip(),
                 "test": test_type,
                 "tokens_per_sec": float(ts_match.group(1)),
+                "kv_cache": kv_type,
                 "raw": ts_raw,
             })
         except (ValueError, IndexError):
@@ -264,11 +310,14 @@ with open(sys.argv[1]) as f:
 if not data["results"]:
     print("  No results parsed.")
     sys.exit(0)
-fmt = "  {:<20} {:<16} {:<8} {:>10}"
-print(fmt.format("Model", "Backend", "Test", "t/s"))
-print("  " + "-" * 58)
+fmt = "  {:<20} {:<16} {:<10} {:<8} {:>10}"
+print(fmt.format("Model", "Backend", "KV cache", "Test", "t/s"))
+print("  " + "-" * 68)
 for r in data["results"]:
-    print(fmt.format(r["model"][:20], r["backend"][:16], r["test"], f"{r['tokens_per_sec']:.2f}"))
+    print(fmt.format(
+        r["model"][:20], r["backend"][:16],
+        r.get("kv_cache", "f16/f16")[:10], r["test"],
+        f"{r['tokens_per_sec']:.2f}"))
 PYEOF
 
 echo ""
diff --git a/tests/benchmark_flags.bats b/tests/benchmark_flags.bats
index 923a363..9bc0f2d 100644
--- a/tests/benchmark_flags.bats
+++ b/tests/benchmark_flags.bats
@@ -10,6 +10,7 @@ load test_helper.sh
     assert_output --partial "--max-size"
     assert_output --partial "--category"
     assert_output --partial "--skip-longctx"
+    assert_output --partial "--kv-types"
 }
 
 @test "run-suite --help shows usage and exits 0" {
@@ -20,6 +21,7 @@ load test_helper.sh
     assert_output --partial "--category"
     assert_output --partial "--skip-longctx"
     assert_output --partial "--tag"
+    assert_output --partial "--kv-types"
 }
 
 @test "benchmark dispatcher shows help with no args" {
@@ -28,6 +30,7 @@ load test_helper.sh
     assert_output --partial "Commands"
     assert_output --partial "--max-size"
     assert_output --partial "--skip-longctx"
+    assert_output --partial "--kv-types"
 }
 
 @test "benchmark dispatcher passes --help through to baseline" {