chore: update model catalog with gemma 4, opus distill, and hw-bandwidth target
This commit is contained in:
4
Makefile
4
Makefile
@@ -45,6 +45,10 @@ serve: ## Launch llama-server with optimized settings (ARGS="-m MODEL.gguf")
|
||||
serve-ngram: ## Launch with n-gram speculative decoding (ARGS="-m MODEL.gguf")
|
||||
@bash bin/serve --ngram $(ARGS)
|
||||
|
||||
# --- Hardware Info ---
|
||||
hw-bandwidth: ## Measure GPU memory bandwidth and compute (clpeak)
|
||||
@clpeak 2>&1
|
||||
|
||||
# --- Optimize ---
|
||||
optimize: ## Interactive optimization walkthrough
|
||||
@bash bin/optimize --all
|
||||
|
||||
Reference in New Issue
Block a user