From eb52ea52ceab623a10be9dd422ab2db6a7f8a8e3 Mon Sep 17 00:00:00 2001 From: Felipe Cardoso Date: Thu, 26 Mar 2026 09:44:16 +0100 Subject: [PATCH] fix: follow symlinks in model discovery, update model catalog - Add -L flag to find in benchmark scripts (follows symlinks to /data/models/llms/) - Exclude mmproj-*.gguf (vision projection files, not LLM models) - Update configs/models.conf: remove Qwen3-Coder (user prefers Qwen3.5-35B-A3B), add Qwen3.5-27B-Q4_K_M and Q8_0 variant, reflect actual downloaded models Co-Authored-By: Claude Opus 4.6 (1M context) --- configs/models.conf | 27 ++++++++++++++------------- scripts/benchmark/run-baseline.sh | 2 +- scripts/benchmark/run-suite.sh | 2 +- scripts/benchmark/setup.sh | 4 ++-- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/configs/models.conf b/configs/models.conf index 3d6696f..1edfba4 100644 --- a/configs/models.conf +++ b/configs/models.conf @@ -1,22 +1,23 @@ # Model catalog for benchmarking # Format: NAME|HF_REPO|FILE|SIZE_GB|CATEGORY|DESCRIPTION # -# Categories: smoke, standard, moe, dense, coding, agentic -# Download with: huggingface-cli download REPO FILE --local-dir data/models +# Categories: smoke, standard, moe, dense +# Download with: huggingface-cli download REPO FILE --local-dir /data/models/llms/REPO # ── Smoke tests (quick, small) ─────────────────────────── -qwen3-4b|unsloth/Qwen3-4B-GGUF|Qwen3-4B-Q4_K_M.gguf|3|smoke|Quick validation +qwen3.5-0.8b-q8|unsloth/Qwen3.5-0.8B-GGUF|Qwen3.5-0.8B-Q8_0.gguf|0.8|smoke|Tiny, Q8 full precision +qwen3.5-2b-q4|unsloth/Qwen3.5-2B-GGUF|Qwen3.5-2B-Q4_K_S.gguf|1.2|smoke|Small dense 2B +qwen3.5-4b-q4|unsloth/Qwen3.5-4B-GGUF|Qwen3.5-4B-Q4_K_S.gguf|2.5|smoke|Small dense 4B -# ── Standard benchmarks ────────────────────────────────── -qwen3-14b|unsloth/Qwen3-14B-GGUF|Qwen3-14B-Q4_K_M.gguf|9|standard|Standard test model +# ── Standard dense models ──────────────────────────────── +qwen3.5-9b-q4|unsloth/Qwen3.5-9B-GGUF|Qwen3.5-9B-Q4_K_S.gguf|5.1|dense|Dense 9B +gpt-oss-20b-mxfp4|lmstudio-community/gpt-oss-20b-GGUF|gpt-oss-20b-MXFP4.gguf|12|dense|GPT-OSS 20B MXFP4 +glm-4.7-flash-q6|lmstudio-community/GLM-4.7-Flash-GGUF|GLM-4.7-Flash-Q6_K.gguf|23|dense|GLM 4.7 Flash Q6 -# ── Qwen3.5 MoE models (fast generation, best for 64GB) ─ -qwen3.5-35b-a3b-q8|unsloth/Qwen3.5-35B-A3B-GGUF|Qwen3.5-35B-A3B-Q8_0.gguf|37|moe|Top pick: near-full precision, 3B active -qwen3.5-35b-a3b-q4|unsloth/Qwen3.5-35B-A3B-GGUF|Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf|22|moe|Best quality/size ratio, 3B active - -# ── Qwen3.5 dense models ──────────────────────────────── +# ── Qwen3.5-27B dense (download needed) ───────────────── qwen3.5-27b-q4|unsloth/Qwen3.5-27B-GGUF|Qwen3.5-27B-Q4_K_M.gguf|17|dense|Dense 27B, quality-first -qwen3.5-27b-q8|unsloth/Qwen3.5-27B-GGUF|Qwen3.5-27B-Q8_0.gguf|29|dense|Dense 27B, max quality -# ── Coding / agentic models ───────────────────────────── -qwen3-coder-30b-a3b|unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF|Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf|18|agentic|Best for tool use + coding, 3B active +# ── MoE models (fast generation, best for 64GB) ───────── +qwen3.5-35b-a3b-q4|unsloth/Qwen3.5-35B-A3B-GGUF|Qwen3.5-35B-A3B-UD-Q4_K_L.gguf|19|moe|MoE 35B, 3B active, Unsloth dynamic +qwen3.5-35b-a3b-q8|unsloth/Qwen3.5-35B-A3B-GGUF|Qwen3.5-35B-A3B-Q8_0.gguf|37|moe|MoE 35B Q8, near-full precision +nemotron-30b-a3b-q4|lmstudio-community/NVIDIA-Nemotron-3-Nano-30B-A3B-GGUF|NVIDIA-Nemotron-3-Nano-30B-A3B-Q4_K_M.gguf|23|moe|Nemotron MoE 30B, 3B active diff --git a/scripts/benchmark/run-baseline.sh b/scripts/benchmark/run-baseline.sh index 5e93efc..928e0e7 100644 --- a/scripts/benchmark/run-baseline.sh +++ b/scripts/benchmark/run-baseline.sh @@ -49,7 +49,7 @@ fi # Find models mapfile -t MODEL_PATHS < <( - find "$MODEL_DIR" -type f -name '*.gguf' \ + find -L "$MODEL_DIR" -type f -name '*.gguf' -not -name 'mmproj-*' \ \( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \ | sort ) diff --git a/scripts/benchmark/run-suite.sh b/scripts/benchmark/run-suite.sh index 0a385fe..e18f213 100644 --- a/scripts/benchmark/run-suite.sh +++ b/scripts/benchmark/run-suite.sh @@ -62,7 +62,7 @@ log_info "Backends: ${available_backends[*]}" # Find models mapfile -t MODEL_PATHS < <( - find "$MODEL_DIR" -type f -name '*.gguf' \ + find -L "$MODEL_DIR" -type f -name '*.gguf' -not -name 'mmproj-*' \ \( -name '*-00001-of-*.gguf' -o -not -name '*-000*-of-*.gguf' \) \ | sort ) diff --git a/scripts/benchmark/setup.sh b/scripts/benchmark/setup.sh index e9ef44d..5bfd412 100644 --- a/scripts/benchmark/setup.sh +++ b/scripts/benchmark/setup.sh @@ -71,10 +71,10 @@ done # ── 3. Check for test models ──────────────────────────── log_info "Checking for test models in $MODEL_DIR..." -model_count=$(find "$MODEL_DIR" -name "*.gguf" 2>/dev/null | wc -l) +model_count=$(find -L "$MODEL_DIR" -name "*.gguf" -not -name "mmproj-*" 2>/dev/null | wc -l) if (( model_count > 0 )); then log_success "Found $model_count model(s):" - find "$MODEL_DIR" -name "*.gguf" | while read -r f; do + find -L "$MODEL_DIR" -name "*.gguf" -not -name "mmproj-*" | while read -r f; do size=$(du -h "$f" | cut -f1) printf " %s (%s)\n" "$(basename "$f")" "$size" done