feat: add benchmark filtering (--max-size, --category, --skip-longctx)
Both run-baseline.sh and run-suite.sh now support: - --max-size GB: skip models larger than N GB (prevents OOM) - --category LIST: filter by catalog category (smoke,dense,moe) - --skip-longctx: skip 32K context tests (saves time + memory) - --reps N: configure repetition count - --help: shows usage with examples Safe pre-optimization run: benchmark baseline --max-size 20 --skip-longctx Full post-optimization: benchmark baseline (no filters, all models + longctx) Also: 4 new BATS tests for flag parsing (98 total, all passing) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,12 +11,33 @@ MODEL_DIR="$(data_dir models)"
|
||||
TAG="run"
|
||||
BACKENDS_FILTER=""
|
||||
MODELS_FILTER=""
|
||||
SKIP_LONGCTX=false
|
||||
MAX_SIZE_GB=0
|
||||
CATEGORY_FILTER=""
|
||||
REPS_STANDARD=5
|
||||
REPS_LONGCTX=3
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--tag|-t) TAG="$2"; shift 2 ;;
|
||||
--backends|-b) BACKENDS_FILTER="$2"; shift 2 ;;
|
||||
--models|-m) MODELS_FILTER="$2"; shift 2 ;;
|
||||
--tag|-t) TAG="$2"; shift 2 ;;
|
||||
--backends|-b) BACKENDS_FILTER="$2"; shift 2 ;;
|
||||
--models|-m) MODELS_FILTER="$2"; shift 2 ;;
|
||||
--skip-longctx) SKIP_LONGCTX=true; shift ;;
|
||||
--max-size|-s) MAX_SIZE_GB="$2"; shift 2 ;;
|
||||
--category|-c) CATEGORY_FILTER="$2"; shift 2 ;;
|
||||
--reps|-r) REPS_STANDARD="$2"; shift 2 ;;
|
||||
--help|-h)
|
||||
echo "Usage: run-suite.sh [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --tag NAME Tag this run (default: run)"
|
||||
echo " --backends LIST Comma-separated backend filter"
|
||||
echo " --models LIST Comma-separated model filename filter"
|
||||
echo " --skip-longctx Skip long-context (32K) tests"
|
||||
echo " --max-size GB Only bench models up to this file size in GB"
|
||||
echo " --category LIST Comma-separated: smoke,dense,moe (from models.conf)"
|
||||
echo " --reps N Standard test repetitions (default: 5)"
|
||||
exit 0 ;;
|
||||
*) log_warn "Unknown argument: $1"; shift ;;
|
||||
esac
|
||||
done
|
||||
@@ -25,9 +46,6 @@ TS="$(timestamp)"
|
||||
RESULT_DIR="$(data_dir benchmarks)/${TAG}-${TS}"
|
||||
mkdir -p "$RESULT_DIR"
|
||||
|
||||
REPS_STANDARD=5
|
||||
REPS_LONGCTX=3
|
||||
|
||||
log_header "Benchmark Suite: $TAG"
|
||||
log_info "Results: $RESULT_DIR"
|
||||
|
||||
@@ -60,26 +78,56 @@ if (( ${#available_backends[@]} == 0 )); then
|
||||
fi
|
||||
log_info "Backends: ${available_backends[*]}"
|
||||
|
||||
# Find models
|
||||
mapfile -t MODEL_PATHS < <(
|
||||
# Find and filter models
|
||||
mapfile -t ALL_MODEL_PATHS < <(
|
||||
find -L "$MODEL_DIR" -type f -name '*.gguf' -not -name 'mmproj-*' \
|
||||
\( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \
|
||||
| sort
|
||||
)
|
||||
|
||||
if [[ -n "$MODELS_FILTER" ]]; then
|
||||
filtered=()
|
||||
for p in "${MODEL_PATHS[@]}"; do
|
||||
name="$(basename "$p")"
|
||||
if echo "$MODELS_FILTER" | tr ',' '\n' | grep -qi "$name"; then
|
||||
filtered+=("$p")
|
||||
MODEL_PATHS=()
|
||||
for p in "${ALL_MODEL_PATHS[@]}"; do
|
||||
local_name="$(basename "$p")"
|
||||
|
||||
# Name filter
|
||||
if [[ -n "$MODELS_FILTER" ]]; then
|
||||
if ! echo "$MODELS_FILTER" | tr ',' '\n' | grep -qi "$local_name"; then
|
||||
continue
|
||||
fi
|
||||
done
|
||||
MODEL_PATHS=("${filtered[@]}")
|
||||
fi
|
||||
fi
|
||||
|
||||
# Size filter
|
||||
if (( MAX_SIZE_GB > 0 )); then
|
||||
file_size_gb=$(( $(stat -Lc%s "$p" 2>/dev/null || echo 0) / 1073741824 ))
|
||||
if (( file_size_gb >= MAX_SIZE_GB )); then
|
||||
log_info "Skipping $local_name (${file_size_gb} GB > ${MAX_SIZE_GB} GB limit)"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Category filter
|
||||
if [[ -n "$CATEGORY_FILTER" ]] && [[ -f "$PROJECT_ROOT/configs/models.conf" ]]; then
|
||||
matched=false
|
||||
found_in_catalog=false
|
||||
while IFS='|' read -r name repo file size_gb category desc; do
|
||||
[[ "$name" =~ ^#.*$ || -z "$name" ]] && continue
|
||||
if [[ "$local_name" == "$file" ]]; then
|
||||
found_in_catalog=true
|
||||
echo "$CATEGORY_FILTER" | tr ',' '\n' | grep -qF "$category" && matched=true
|
||||
break
|
||||
fi
|
||||
done < "$PROJECT_ROOT/configs/models.conf"
|
||||
if $found_in_catalog && ! $matched; then
|
||||
log_info "Skipping $local_name (category not in: $CATEGORY_FILTER)"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
MODEL_PATHS+=("$p")
|
||||
done
|
||||
|
||||
if (( ${#MODEL_PATHS[@]} == 0 )); then
|
||||
log_error "No models found. Run: make benchmark-setup"
|
||||
log_error "No models matched filters. Run: make benchmark-setup"
|
||||
exit 1
|
||||
fi
|
||||
log_info "Models: ${#MODEL_PATHS[@]}"
|
||||
@@ -115,6 +163,9 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
||||
fi
|
||||
|
||||
# Long-context test
|
||||
if $SKIP_LONGCTX; then
|
||||
continue
|
||||
fi
|
||||
OUT_LC="$RESULT_DIR/${MODEL_NAME}__${BACKEND_SAFE}__fa1__longctx32768.log"
|
||||
if [[ ! -s "$OUT_LC" ]]; then
|
||||
printf "\n${BOLD}>> [%s] %s — longctx${RESET}\n" "$BACKEND" "$MODEL_NAME"
|
||||
|
||||
Reference in New Issue
Block a user