feat: add Qwen3.5 model catalog and agentic evaluation framework
Models: - configs/models.conf: catalog with Qwen3.5-35B-A3B (MoE, top pick), Qwen3.5-27B (dense), Qwen3-Coder-30B-A3B (agentic/coding) - Updated benchmark setup to show catalog with download status - docs/model-recommendations.md: memory planning, quantization guide Agentic evaluation: - scripts/agentic/setup.sh: installs inspect-ai, evalplus, bigcodebench in a Python venv - scripts/agentic/run-eval.sh: runs evaluations against local LLM server (ollama or llama.cpp). Suites: quick (HumanEval+IFEval), code (EvalPlus+BigCodeBench), tooluse (BFCL), full (all) - bin/agentic: dispatcher with help - docs/agentic-benchmarks.md: methodology, framework comparison, model recommendations for agentic use Updated: Makefile (6 new targets), README, CLAUDE.md, docs/references.md Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,13 +8,13 @@ source "$SCRIPT_DIR/../../lib/detect.sh"
|
||||
|
||||
TOOLBOXES_REPO="/data/workspace/projects/HomeLab/strix-halo-toolboxes/amd-strix-halo-llamacpp-toolboxes"
|
||||
MODEL_DIR="$(data_dir models)"
|
||||
MODEL_CATALOG="$PROJECT_ROOT/configs/models.conf"
|
||||
|
||||
log_header "Benchmark Setup"
|
||||
|
||||
# ── 1. Check toolbox containers ──────────────────────────
|
||||
log_info "Checking toolbox containers..."
|
||||
|
||||
# Minimum required: vulkan-radv (most stable)
|
||||
REQUIRED_TOOLBOXES=("llama-vulkan-radv")
|
||||
OPTIONAL_TOOLBOXES=("llama-rocm-6.4.4" "llama-rocm-7.2" "llama-vulkan-amdvlk")
|
||||
|
||||
@@ -22,7 +22,7 @@ existing=$(detect_toolbox_names 2>/dev/null || true)
|
||||
missing=()
|
||||
|
||||
for tb in "${REQUIRED_TOOLBOXES[@]}"; do
|
||||
if echo "$existing" | grep -q "^${tb}$"; then
|
||||
if echo "$existing" | grep -qF "$tb"; then
|
||||
log_success "Toolbox: $tb"
|
||||
else
|
||||
missing+=("$tb")
|
||||
@@ -31,7 +31,7 @@ for tb in "${REQUIRED_TOOLBOXES[@]}"; do
|
||||
done
|
||||
|
||||
for tb in "${OPTIONAL_TOOLBOXES[@]}"; do
|
||||
if echo "$existing" | grep -q "^${tb}$"; then
|
||||
if echo "$existing" | grep -qF "$tb"; then
|
||||
log_success "Toolbox: $tb (optional)"
|
||||
else
|
||||
log_info "Toolbox not present: $tb (optional)"
|
||||
@@ -80,26 +80,54 @@ if (( model_count > 0 )); then
|
||||
done
|
||||
else
|
||||
log_warn "No GGUF models found in $MODEL_DIR"
|
||||
log_info "Download a test model. Example:"
|
||||
fi
|
||||
|
||||
# ── 4. Show model catalog ───────────────────────────────
|
||||
log_header "Model Catalog"
|
||||
log_info "Available models (from configs/models.conf):"
|
||||
echo ""
|
||||
printf " ${BOLD}%-28s %-10s %-8s %s${RESET}\n" "Name" "Category" "Size" "Description"
|
||||
echo " $(printf '%.0s─' {1..70})"
|
||||
while IFS='|' read -r name repo file size_gb category desc; do
|
||||
[[ "$name" =~ ^#.*$ || -z "$name" ]] && continue
|
||||
local_file="$MODEL_DIR/$file"
|
||||
status=" "
|
||||
if [[ -f "$local_file" ]]; then
|
||||
status="*"
|
||||
fi
|
||||
printf " %s %-27s %-10s %4s GB %s\n" "$status" "$name" "$category" "$size_gb" "$desc"
|
||||
done < "$MODEL_CATALOG"
|
||||
echo ""
|
||||
echo " (* = downloaded)"
|
||||
echo ""
|
||||
|
||||
# ── 5. Offer downloads ──────────────────────────────────
|
||||
if is_cmd huggingface-cli; then
|
||||
log_info "Download models with:"
|
||||
echo ""
|
||||
echo " # Small (4B, ~3 GB):"
|
||||
echo " huggingface-cli download Qwen/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf \\"
|
||||
echo " --local-dir $MODEL_DIR"
|
||||
echo " # Recommended starter set:"
|
||||
echo " # Smoke test (3 GB):"
|
||||
echo " huggingface-cli download unsloth/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf --local-dir $MODEL_DIR"
|
||||
echo ""
|
||||
echo " # Medium (14B, ~9 GB):"
|
||||
echo " huggingface-cli download Qwen/Qwen3-14B-GGUF Qwen3-14B-Q4_K_M.gguf \\"
|
||||
echo " --local-dir $MODEL_DIR"
|
||||
echo " # Top pick — Qwen3.5-35B-A3B MoE Q8 (37 GB, ~85 t/s gen):"
|
||||
echo " huggingface-cli download unsloth/Qwen3.5-35B-A3B-GGUF Qwen3.5-35B-A3B-Q8_0.gguf --local-dir $MODEL_DIR"
|
||||
echo ""
|
||||
echo " # Agentic/coding — Qwen3-Coder-30B-A3B (18 GB, best for tool use):"
|
||||
echo " huggingface-cli download unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf --local-dir $MODEL_DIR"
|
||||
echo ""
|
||||
echo " # Or download any model from catalog:"
|
||||
echo " # huggingface-cli download REPO FILE --local-dir $MODEL_DIR"
|
||||
echo ""
|
||||
|
||||
if is_cmd huggingface-cli; then
|
||||
if confirm "Download Qwen3-4B Q4_K_M (~3 GB) as test model?"; then
|
||||
huggingface-cli download Qwen/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf \
|
||||
if (( model_count == 0 )); then
|
||||
if confirm "Download Qwen3-4B Q4_K_M (~3 GB) as smoke test model?"; then
|
||||
huggingface-cli download unsloth/Qwen3-4B-GGUF Qwen3-4B-Q4_K_M.gguf \
|
||||
--local-dir "$MODEL_DIR"
|
||||
log_success "Model downloaded"
|
||||
fi
|
||||
else
|
||||
log_info "Install huggingface-cli: pip install huggingface_hub[cli]"
|
||||
fi
|
||||
else
|
||||
log_info "Install huggingface-cli: pip install huggingface_hub[cli]"
|
||||
fi
|
||||
|
||||
log_header "Setup Complete"
|
||||
|
||||
Reference in New Issue
Block a user