diff --git a/scripts/benchmark/run-baseline.sh b/scripts/benchmark/run-baseline.sh index ec6bbda..c5104be 100644 --- a/scripts/benchmark/run-baseline.sh +++ b/scripts/benchmark/run-baseline.sh @@ -19,6 +19,8 @@ MAX_SIZE_GB=0 # 0 = no limit CATEGORY_FILTER="" CTX_DEPTH=32768 CTX_PROMPT=2048 +PP_TOKENS=512 +TG_TOKENS=128 while [[ $# -gt 0 ]]; do case "$1" in @@ -27,6 +29,8 @@ while [[ $# -gt 0 ]]; do --category|-c) CATEGORY_FILTER="$2"; shift 2 ;; --reps|-r) REPS_STANDARD="$2"; shift 2 ;; --context|-d) CTX_DEPTH="$2"; shift 2 ;; + --pp) PP_TOKENS="$2"; shift 2 ;; + --tg) TG_TOKENS="$2"; shift 2 ;; --help|-h) echo "Usage: run-baseline.sh [OPTIONS]" echo "" @@ -36,10 +40,13 @@ while [[ $# -gt 0 ]]; do echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)" echo " --reps N Standard test repetitions (default: 5)" echo " --context N Long-context depth in tokens (default: 32768)" + echo " --pp N Prompt processing tokens (default: 512)" + echo " --tg N Token generation count (default: 128)" echo "" echo "Examples:" echo " run-baseline.sh --max-size 20 # Only models ≤20 GB" echo " run-baseline.sh --context 131072 --category moe # 128K context on MoE" + echo " run-baseline.sh --tg 1024 --pp 2048 --category moe # Realistic agentic" echo " run-baseline.sh --skip-longctx --max-size 15 # Quick safe run" exit 0 ;; *) log_warn "Unknown argument: $1"; shift ;; @@ -182,12 +189,15 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do TOOLBOX_MODEL_PATH="/run/host${TOOLBOX_MODEL_PATH}" fi - # Standard test (pp512 + tg128, default context) - OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log" + # Standard test + local_suffix="fa1" + [[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}" + OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}.log" if [[ ! -s "$OUT" ]]; then - printf "\n${BOLD}>> [%s] %s — standard test${RESET}\n" "$BACKEND" "$MODEL_NAME" + printf "\n${BOLD}>> [%s] %s — pp%s/tg%s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS" CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN" - -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 -r "$REPS_STANDARD") + -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 + -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD") printf " cmd: %s\n" "${CMD[*]}" if "${CMD[@]}" > "$OUT" 2>&1; then diff --git a/scripts/benchmark/run-suite.sh b/scripts/benchmark/run-suite.sh index 94a2c29..7049298 100644 --- a/scripts/benchmark/run-suite.sh +++ b/scripts/benchmark/run-suite.sh @@ -18,6 +18,8 @@ REPS_STANDARD=5 REPS_LONGCTX=3 CTX_DEPTH=32768 CTX_PROMPT=2048 +PP_TOKENS=512 +TG_TOKENS=128 while [[ $# -gt 0 ]]; do case "$1" in @@ -29,6 +31,8 @@ while [[ $# -gt 0 ]]; do --category|-c) CATEGORY_FILTER="$2"; shift 2 ;; --reps|-r) REPS_STANDARD="$2"; shift 2 ;; --context|-d) CTX_DEPTH="$2"; shift 2 ;; + --pp) PP_TOKENS="$2"; shift 2 ;; + --tg) TG_TOKENS="$2"; shift 2 ;; --help|-h) echo "Usage: run-suite.sh [OPTIONS]" echo "" @@ -41,9 +45,12 @@ while [[ $# -gt 0 ]]; do echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)" echo " --reps N Standard test repetitions (default: 5)" echo " --context N Long-context depth in tokens (default: 32768)" + echo " --pp N Prompt processing tokens (default: 512)" + echo " --tg N Token generation count (default: 128)" echo "" echo "Examples:" echo " run-suite.sh --tag ctx128k --context 131072 --category moe" + echo " run-suite.sh --tag realistic --tg 1024 --pp 2048 --category moe" echo " run-suite.sh --tag post-opt --max-size 20 --skip-longctx" exit 0 ;; *) log_warn "Unknown argument: $1"; shift ;; @@ -170,11 +177,14 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do fi # Standard test - OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1.log" + local_suffix="fa1" + [[ "$PP_TOKENS" != "512" || "$TG_TOKENS" != "128" ]] && local_suffix="fa1__pp${PP_TOKENS}_tg${TG_TOKENS}" + OUT="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__${local_suffix}.log" if [[ ! -s "$OUT" ]]; then - printf "\n${BOLD}>> [%s] %s — standard${RESET}\n" "$BACKEND" "$MODEL_NAME" + printf "\n${BOLD}>> [%s] %s — pp%s/tg%s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$PP_TOKENS" "$TG_TOKENS" CMD=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN" - -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 -r "$REPS_STANDARD") + -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 + -p "$PP_TOKENS" -n "$TG_TOKENS" -r "$REPS_STANDARD") if "${CMD[@]}" > "$OUT" 2>&1; then log_success "Done"; tail -3 "$OUT" else