feat: add --context flag for configurable long-context benchmarks
Both run-baseline.sh and run-suite.sh now accept --context N to set the long-context depth (default: 32768). Prompt tokens auto-scale to ~1/16 of context depth for larger windows. Examples: benchmark run --tag ctx64k --context 65536 --category moe benchmark run --tag ctx128k --context 131072 --category moe --reps 3 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,8 @@ REPS_LONGCTX=3
|
|||||||
SKIP_LONGCTX=false
|
SKIP_LONGCTX=false
|
||||||
MAX_SIZE_GB=0 # 0 = no limit
|
MAX_SIZE_GB=0 # 0 = no limit
|
||||||
CATEGORY_FILTER=""
|
CATEGORY_FILTER=""
|
||||||
|
CTX_DEPTH=32768
|
||||||
|
CTX_PROMPT=2048
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
@@ -24,24 +26,32 @@ while [[ $# -gt 0 ]]; do
|
|||||||
--max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;;
|
--max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;;
|
||||||
--category|-c) CATEGORY_FILTER="$2"; shift 2 ;;
|
--category|-c) CATEGORY_FILTER="$2"; shift 2 ;;
|
||||||
--reps|-r) REPS_STANDARD="$2"; shift 2 ;;
|
--reps|-r) REPS_STANDARD="$2"; shift 2 ;;
|
||||||
|
--context|-d) CTX_DEPTH="$2"; shift 2 ;;
|
||||||
--help|-h)
|
--help|-h)
|
||||||
echo "Usage: run-baseline.sh [OPTIONS]"
|
echo "Usage: run-baseline.sh [OPTIONS]"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Options:"
|
echo "Options:"
|
||||||
echo " --skip-longctx Skip long-context (32K) tests"
|
echo " --skip-longctx Skip long-context tests"
|
||||||
echo " --max-size GB Only bench models up to this file size in GB"
|
echo " --max-size GB Only bench models up to this file size in GB"
|
||||||
echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)"
|
echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)"
|
||||||
echo " --reps N Standard test repetitions (default: 5)"
|
echo " --reps N Standard test repetitions (default: 5)"
|
||||||
|
echo " --context N Long-context depth in tokens (default: 32768)"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Examples:"
|
echo "Examples:"
|
||||||
echo " run-baseline.sh --max-size 20 # Only models ≤20 GB"
|
echo " run-baseline.sh --max-size 20 # Only models ≤20 GB"
|
||||||
echo " run-baseline.sh --category smoke,moe # Only smoke + MoE models"
|
echo " run-baseline.sh --context 131072 --category moe # 128K context on MoE"
|
||||||
echo " run-baseline.sh --skip-longctx --max-size 15 # Quick safe run"
|
echo " run-baseline.sh --skip-longctx --max-size 15 # Quick safe run"
|
||||||
exit 0 ;;
|
exit 0 ;;
|
||||||
*) log_warn "Unknown argument: $1"; shift ;;
|
*) log_warn "Unknown argument: $1"; shift ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Scale prompt tokens for large contexts: ~1/16 of depth, min 512
|
||||||
|
if (( CTX_DEPTH > 32768 )); then
|
||||||
|
CTX_PROMPT=$(( CTX_DEPTH / 16 ))
|
||||||
|
(( CTX_PROMPT < 512 )) && CTX_PROMPT=512
|
||||||
|
fi
|
||||||
|
|
||||||
log_header "Baseline Benchmark Capture"
|
log_header "Baseline Benchmark Capture"
|
||||||
log_info "Results will be saved to: $RESULT_DIR"
|
log_info "Results will be saved to: $RESULT_DIR"
|
||||||
$SKIP_LONGCTX && log_info "Long-context tests: SKIPPED"
|
$SKIP_LONGCTX && log_info "Long-context tests: SKIPPED"
|
||||||
@@ -196,16 +206,16 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
|||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
|
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}.log"
|
||||||
if [[ ! -s "$OUT_LC" ]]; then
|
if [[ ! -s "$OUT_LC" ]]; then
|
||||||
printf "\n${BOLD}>> [%s] %s — long-context test${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
printf "\n${BOLD}>> [%s] %s — long-context %s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH"
|
||||||
|
|
||||||
UB_SIZE=2048
|
UB_SIZE=2048
|
||||||
[[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
|
[[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
|
||||||
|
|
||||||
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||||
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
|
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
|
||||||
-p 2048 -n 32 -d 32768 -ub "$UB_SIZE"
|
-p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE"
|
||||||
-r "$REPS_LONGCTX")
|
-r "$REPS_LONGCTX")
|
||||||
|
|
||||||
printf " cmd: %s\n" "${CMD_LC[*]}"
|
printf " cmd: %s\n" "${CMD_LC[*]}"
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ MAX_SIZE_GB=0
|
|||||||
CATEGORY_FILTER=""
|
CATEGORY_FILTER=""
|
||||||
REPS_STANDARD=5
|
REPS_STANDARD=5
|
||||||
REPS_LONGCTX=3
|
REPS_LONGCTX=3
|
||||||
|
CTX_DEPTH=32768
|
||||||
|
CTX_PROMPT=2048
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
@@ -26,6 +28,7 @@ while [[ $# -gt 0 ]]; do
|
|||||||
--max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;;
|
--max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;;
|
||||||
--category|-c) CATEGORY_FILTER="$2"; shift 2 ;;
|
--category|-c) CATEGORY_FILTER="$2"; shift 2 ;;
|
||||||
--reps|-r) REPS_STANDARD="$2"; shift 2 ;;
|
--reps|-r) REPS_STANDARD="$2"; shift 2 ;;
|
||||||
|
--context|-d) CTX_DEPTH="$2"; shift 2 ;;
|
||||||
--help|-h)
|
--help|-h)
|
||||||
echo "Usage: run-suite.sh [OPTIONS]"
|
echo "Usage: run-suite.sh [OPTIONS]"
|
||||||
echo ""
|
echo ""
|
||||||
@@ -33,15 +36,26 @@ while [[ $# -gt 0 ]]; do
|
|||||||
echo " --tag NAME Tag this run (default: run)"
|
echo " --tag NAME Tag this run (default: run)"
|
||||||
echo " --backends LIST Comma-separated backend filter"
|
echo " --backends LIST Comma-separated backend filter"
|
||||||
echo " --models LIST Comma-separated model filename filter"
|
echo " --models LIST Comma-separated model filename filter"
|
||||||
echo " --skip-longctx Skip long-context (32K) tests"
|
echo " --skip-longctx Skip long-context tests"
|
||||||
echo " --max-size GB Only bench models up to this file size in GB"
|
echo " --max-size GB Only bench models up to this file size in GB"
|
||||||
echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)"
|
echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)"
|
||||||
echo " --reps N Standard test repetitions (default: 5)"
|
echo " --reps N Standard test repetitions (default: 5)"
|
||||||
|
echo " --context N Long-context depth in tokens (default: 32768)"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " run-suite.sh --tag ctx128k --context 131072 --category moe"
|
||||||
|
echo " run-suite.sh --tag post-opt --max-size 20 --skip-longctx"
|
||||||
exit 0 ;;
|
exit 0 ;;
|
||||||
*) log_warn "Unknown argument: $1"; shift ;;
|
*) log_warn "Unknown argument: $1"; shift ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Scale prompt tokens for context: use ~1/16 of context depth, min 512
|
||||||
|
if (( CTX_DEPTH > 32768 )); then
|
||||||
|
CTX_PROMPT=$(( CTX_DEPTH / 16 ))
|
||||||
|
(( CTX_PROMPT < 512 )) && CTX_PROMPT=512
|
||||||
|
fi
|
||||||
|
|
||||||
TS="$(timestamp)"
|
TS="$(timestamp)"
|
||||||
RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
|
RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
|
||||||
mkdir -p "$RESULT_DIR"
|
mkdir -p "$RESULT_DIR"
|
||||||
@@ -172,13 +186,13 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
|||||||
if $SKIP_LONGCTX; then
|
if $SKIP_LONGCTX; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
|
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx${CTX_DEPTH}.log"
|
||||||
if [[ ! -s "$OUT_LC" ]]; then
|
if [[ ! -s "$OUT_LC" ]]; then
|
||||||
printf "\n${BOLD}>> [%s] %s — longctx${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
printf "\n${BOLD}>> [%s] %s — longctx %s${RESET}\n" "$BACKEND" "$MODEL_NAME" "$CTX_DEPTH"
|
||||||
UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
|
UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
|
||||||
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||||
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
|
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
|
||||||
-p 2048 -n 32 -d 32768 -ub "$UB_SIZE" -r "$REPS_LONGCTX")
|
-p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE" -r "$REPS_LONGCTX")
|
||||||
if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
|
if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
|
||||||
log_success "Done"; tail -3 "$OUT_LC"
|
log_success "Done"; tail -3 "$OUT_LC"
|
||||||
else
|
else
|
||||||
|
|||||||
Reference in New Issue
Block a user