chore: update model catalog with gemma 4, opus distill, and hw-bandwidth target

This commit is contained in:
Felipe Cardoso
2026-04-03 20:03:53 +02:00
parent 6ab08537ca
commit 474d94a07e
2 changed files with 15 additions and 4 deletions

View File

@@ -45,6 +45,10 @@ serve: ## Launch llama-server with optimized settings (ARGS="-m MODEL.gguf")
serve-ngram: ## Launch with n-gram speculative decoding (ARGS="-m MODEL.gguf")
@bash bin/serve --ngram $(ARGS)
# --- Hardware Info ---
hw-bandwidth: ## Measure GPU memory bandwidth and compute (clpeak)
@clpeak 2>&1
# --- Optimize ---
optimize: ## Interactive optimization walkthrough
@bash bin/optimize --all