feat(benchmark): add --kv-types flag for KV cache quantization sweep
This commit is contained in:
@@ -23,10 +23,12 @@ case "${1:-help}" in
|
||||
echo " --category LIST Comma-separated: smoke,dense,moe"
|
||||
echo " --skip-longctx Skip long-context (32K) tests"
|
||||
echo " --reps N Standard test repetitions (default: 5)"
|
||||
echo " --kv-types LIST KV cache sweep (e.g. f16,q8_0,q4_0 or q4_0:q8_0)"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " benchmark baseline --max-size 20 --skip-longctx"
|
||||
echo " benchmark run --tag post-opt --category moe"
|
||||
echo " benchmark run --tag kv-sweep --kv-types f16,q8_0,q4_0 --context 131072"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
Reference in New Issue
Block a user