diff --git a/scripts/benchmark/run-baseline.sh b/scripts/benchmark/run-baseline.sh index 40ff8ae..ec6bbda 100644 --- a/scripts/benchmark/run-baseline.sh +++ b/scripts/benchmark/run-baseline.sh @@ -17,6 +17,8 @@ REPS_LONGCTX=3 SKIP_LONGCTX=false MAX_SIZE_GB=0 # 0 = no limit CATEGORY_FILTER="" +CTX_DEPTH=32768 +CTX_PROMPT=2048 while [[ $# -gt 0 ]]; do case "$1" in @@ -24,24 +26,32 @@ while [[ $# -gt 0 ]]; do --max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;; --category|-c) CATEGORY_FILTER="$2"; shift 2 ;; --reps|-r) REPS_STANDARD="$2"; shift 2 ;; + --context|-d) CTX_DEPTH="$2"; shift 2 ;; --help|-h) echo "Usage: run-baseline.sh [OPTIONS]" echo "" echo "Options:" - echo " --skip-longctx Skip long-context (32K) tests" + echo " --skip-longctx Skip long-context tests" echo " --max-size GB Only bench models up to this file size in GB" echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)" echo " --reps N Standard test repetitions (default: 5)" + echo " --context N Long-context depth in tokens (default: 32768)" echo "" echo "Examples:" echo " run-baseline.sh --max-size 20 # Only models ≤20 GB" - echo " run-baseline.sh --category smoke,moe # Only smoke + MoE models" + echo " run-baseline.sh --context 131072 --category moe # 128K context on MoE" echo " run-baseline.sh --skip-longctx --max-size 15 # Quick safe run" exit 0 ;; *) log_warn "Unknown argument: $1"; shift ;; esac done +# Scale prompt tokens for large contexts: ~1/16 of depth, min 512 +if (( CTX_DEPTH > 32768 )); then + CTX_PROMPT=$(( CTX_DEPTH / 16 )) + (( CTX_PROMPT < 512 )) && CTX_PROMPT=512 +fi + log_header "Baseline Benchmark Capture" log_info "Results will be saved to: $RESULT_DIR" $SKIP_LONGCTX && log_info "Long-context tests: SKIPPED" @@ -196,16 +206,16 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do continue fi - OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log" + OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}.log" if [[ ! -s "$OUT_LC" ]]; then - printf "\n${BOLD}>> [%s] %s — long-context test${RESET}\n" "$BACKEND" "$MODEL_NAME" + printf "\n${BOLD}>> [%s] %s — long-context %s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH" UB_SIZE=2048 [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512 CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN" -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 - -p 2048 -n 32 -d 32768 -ub "$UB_SIZE" + -p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE" -r "$REPS_LONGCTX") printf " cmd: %s\n" "${CMD_LC[*]}" diff --git a/scripts/benchmark/run-suite.sh b/scripts/benchmark/run-suite.sh index ebca5c0..94a2c29 100644 --- a/scripts/benchmark/run-suite.sh +++ b/scripts/benchmark/run-suite.sh @@ -16,6 +16,8 @@ MAX_SIZE_GB=0 CATEGORY_FILTER="" REPS_STANDARD=5 REPS_LONGCTX=3 +CTX_DEPTH=32768 +CTX_PROMPT=2048 while [[ $# -gt 0 ]]; do case "$1" in @@ -26,6 +28,7 @@ while [[ $# -gt 0 ]]; do --max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;; --category|-c) CATEGORY_FILTER="$2"; shift 2 ;; --reps|-r) REPS_STANDARD="$2"; shift 2 ;; + --context|-d) CTX_DEPTH="$2"; shift 2 ;; --help|-h) echo "Usage: run-suite.sh [OPTIONS]" echo "" @@ -33,15 +36,26 @@ while [[ $# -gt 0 ]]; do echo " --tag NAME Tag this run (default: run)" echo " --backends LIST Comma-separated backend filter" echo " --models LIST Comma-separated model filename filter" - echo " --skip-longctx Skip long-context (32K) tests" + echo " --skip-longctx Skip long-context tests" echo " --max-size GB Only bench models up to this file size in GB" echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)" echo " --reps N Standard test repetitions (default: 5)" + echo " --context N Long-context depth in tokens (default: 32768)" + echo "" + echo "Examples:" + echo " run-suite.sh --tag ctx128k --context 131072 --category moe" + echo " run-suite.sh --tag post-opt --max-size 20 --skip-longctx" exit 0 ;; *) log_warn "Unknown argument: $1"; shift ;; esac done +# Scale prompt tokens for context: use ~1/16 of context depth, min 512 +if (( CTX_DEPTH > 32768 )); then + CTX_PROMPT=$(( CTX_DEPTH / 16 )) + (( CTX_PROMPT < 512 )) && CTX_PROMPT=512 +fi + TS="$(timestamp)" RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}" mkdir -p "$RESULT_DIR" @@ -172,13 +186,13 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do if $SKIP_LONGCTX; then continue fi - OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log" + OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}.log" if [[ ! -s "$OUT_LC" ]]; then - printf "\n${BOLD}>> [%s] %s — longctx${RESET}\n" "$BACKEND" "$MODEL_NAME" + printf "\n${BOLD}>> [%s] %s — longctx %s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH" UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512 CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN" -ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1 - -p 2048 -n 32 -d 32768 -ub "$UB_SIZE" -r "$REPS_LONGCTX") + -p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE" -r "$REPS_LONGCTX") if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then log_success "Done"; tail -3 "$OUT_LC" else