feat(serve): set APEX I-Compact as default, harden benchmark workflow
Serving: - make serve now launches Claude-distilled APEX 35B-A3B (16GB) with 2 parallel slots and 256K context as the daily driver - add serve-custom for ad-hoc model testing - add flush-gpu to reclaim unified memory after stuck runs Benchmarks: - default Vulkan-only backends (ROCm trails at long context) - add --backends filter to run-baseline.sh - fix backend filter substring bug (grep -qFx for exact line match) - fix model filter regex metacharacter bug (grep -qiF for literal) - respect --tg in long-context tests instead of hardcoded n=32 ROCm bump to 7.2.1 (kernel 6.18.4+ patch); keep 7.2 as optional. Catalog: - add mudler APEX I-Compact (Claude-distilled 35B, 17GB) - add 0xSero REAP-40 (pruned 122B-A10B, 46GB) - update download instructions: hf download (huggingface-cli is gone)
This commit is contained in:
@@ -9,7 +9,7 @@ source "$SCRIPT_DIR/../../lib/format.sh"
|
||||
|
||||
MODEL_DIR="$(data_dir models)"
|
||||
TAG="run"
|
||||
BACKENDS_FILTER=""
|
||||
BACKENDS_FILTER="llama-vulkan-radv"
|
||||
MODELS_FILTER=""
|
||||
SKIP_LONGCTX=false
|
||||
MAX_SIZE_GB=0
|
||||
@@ -99,13 +99,14 @@ declare -A BENCH_PATHS=(
|
||||
[llama-vulkan-amdvlk]="/usr/sbin/llama-bench"
|
||||
[llama-rocm-6.4.4]="/usr/local/bin/llama-bench"
|
||||
[llama-rocm-7.2]="/usr/local/bin/llama-bench"
|
||||
[llama-rocm-7.2.1]="/usr/local/bin/llama-bench"
|
||||
[llama-rocm7-nightlies]="/usr/local/bin/llama-bench"
|
||||
)
|
||||
|
||||
available_backends=()
|
||||
for tb in "${!BENCH_PATHS[@]}"; do
|
||||
if echo "$existing" | grep -q "^${tb}$"; then
|
||||
if [[ -z "$BACKENDS_FILTER" ]] || echo "$BACKENDS_FILTER" | tr ',' '\n' | grep -q "$tb"; then
|
||||
if [[ -z "$BACKENDS_FILTER" ]] || echo "$BACKENDS_FILTER" | tr ',' '\n' | grep -qFx "$tb"; then
|
||||
available_backends+=("$tb")
|
||||
fi
|
||||
fi
|
||||
@@ -130,7 +131,7 @@ for p in "${ALL_MODEL_PATHS[@]}"; do
|
||||
|
||||
# Name filter
|
||||
if [[ -n "$MODELS_FILTER" ]]; then
|
||||
if ! echo "$MODELS_FILTER" | tr ',' '\n' | grep -qi "$local_name"; then
|
||||
if ! echo "$MODELS_FILTER" | tr ',' '\n' | grep -qiF "$local_name"; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
@@ -252,7 +253,7 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
||||
UB_SIZE=2048; [[ "$BACKEND" == *vulkan* ]] && UB_SIZE=512
|
||||
CMD_LC=(toolbox run -c "$BACKEND" -- "${ENV_ARGS[@]}" "$BENCH_BIN"
|
||||
-ngl 99 -mmp 0 -m "$TOOLBOX_MODEL_PATH" -fa 1
|
||||
-p "$CTX_PROMPT" -n 32 -d "$CTX_DEPTH" -ub "$UB_SIZE" -r "$REPS_LONGCTX" "${BATCH_ARGS[@]}" "${KV_ARGS[@]}")
|
||||
-p "$CTX_PROMPT" -n "$TG_TOKENS" -d "$CTX_DEPTH" -ub "$UB_SIZE" -r "$REPS_LONGCTX" "${BATCH_ARGS[@]}" "${KV_ARGS[@]}")
|
||||
if "${CMD_LC[@]}" > "$OUT_LC" 2>&1; then
|
||||
log_success "Done"; tail -3 "$OUT_LC"
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user