feat: add --pp and --tg flags for realistic benchmark workloads
Standard benchmarks use pp512/tg128 which underestimates real-world agentic coding where responses are 500-2000 tokens. Now configurable: --pp N Prompt processing tokens (default: 512) --tg N Token generation count (default: 128) Examples: benchmark run --tag realistic --tg 1024 --pp 2048 --category moe benchmark run --tag full-response --tg 2048 --category moe --reps 3 Log filenames include pp/tg when non-default (e.g., model__backend__fa1__pp2048_tg1024.log) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,8 @@ MAX_SIZE_GB=0 # 0 = no limit
|
||||
CATEGORY_FILTER=""
|
||||
CTX_DEPTH=32768
|
||||
CTX_PROMPT=2048
|
||||
PP_TOKENS=512
|
||||
TG_TOKENS=128
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
@@ -27,6 +29,8 @@ while [[ $# -gt 0 ]]; do
|
||||
--category|-c) CATEGORY_FILTER="$2"; shift 2 ;;
|
||||
--reps|-r) REPS_STANDARD="$2"; shift 2 ;;
|
||||
--context|-d) CTX_DEPTH="$2"; shift 2 ;;
|
||||
--pp) PP_TOKENS="$2"; shift 2 ;;
|
||||
--tg) TG_TOKENS="$2"; shift 2 ;;
|
||||
--help|-h)
|
||||
echo "Usage: run-baseline.sh [OPTIONS]"
|
||||
echo ""
|
||||
@@ -36,10 +40,13 @@ while [[ $# -gt 0 ]]; do
|
||||
echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)"
|
||||
echo " --reps N Standard test repetitions (default: 5)"
|
||||
echo " --context N Long-context depth in tokens (default: 32768)"
|
||||
echo " --pp N Prompt processing tokens (default: 512)"
|
||||
echo " --tg N Token generation count (default: 128)"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " run-baseline.sh --max-size 20 # Only models ≤20 GB"
|
||||
echo " run-baseline.sh --context 131072 --category moe # 128K context on MoE"
|
||||
echo " run-baseline.sh --tg 1024 --pp 2048 --category moe # Realistic agentic"
|
||||
echo " run-baseline.sh --skip-longctx --max-size 15 # Quick safe run"
|
||||
exit 0 ;;
|
||||
*) log_warn "Unknown argument: $1"; shift ;;
|
||||
@@ -182,12 +189,15 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
||||
TOOLBOX_MODEL_PATH="/run/host${TOOLBOX_MODEL_PATH}"
|
||||
fi
|
||||
|
||||
# Standard test (pp512 + tg128, default context)
|
||||
OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log"
|
||||
# Standard test
|
||||
local_suffix="fa1"
|
||||
[[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}"
|
||||
OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}.log"
|
||||
if [[ ! -s "$OUT" ]]; then
|
||||
printf "\n${BOLD}>> [%s] %s — standard test${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
||||
printf "\n${BOLD}>> [%s] %s — pp%s/tg%s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS"
|
||||
CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 -r "$REPS_STANDARD")
|
||||
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
|
||||
-p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD")
|
||||
|
||||
printf " cmd: %s\n" "${CMD[*]}"
|
||||
if "${CMD[@]}" > "$OUT" 2>&1; then
|
||||
|
||||
Reference in New Issue
Block a user