feat(benchmark): add --kv-types flag for KV cache quantization sweep

This commit is contained in:
Felipe Cardoso
2026-03-27 12:29:19 +01:00
parent 38daf953bf
commit 7531f6fa74
4 changed files with 176 additions and 82 deletions

View File

@@ -23,10 +23,12 @@ case "${1:-help}" in
echo " --category LIST Comma-separated: smoke,dense,moe"
echo " --skip-longctx Skip long-context (32K) tests"
echo " --reps N Standard test repetitions (default: 5)"
echo " --kv-types LIST KV cache sweep (e.g. f16,q8_0,q4_0 or q4_0:q8_0)"
echo ""
echo "Examples:"
echo " benchmark baseline --max-size 20 --skip-longctx"
echo " benchmark run --tag post-opt --category moe"
echo " benchmark run --tag kv-sweep --kv-types f16,q8_0,q4_0 --context 131072"
exit 1
;;
esac