fix: address code review findings — batch args, venv path, serve flags
- Fix missing BATCH_ARGS in long-context commands (both benchmark scripts) - Fix CLAUDE.md stale venv path (data/venv → .venv) and add serve/power docs - Add -b/--batch to bin/benchmark help text - Add --no-think flag to serve script (--reasoning-budget 0) - Sanitize model names in eval run directories - Simplify agentic setup to use requirements.txt - Add serve --help test, batch flag assertions to existing tests - Add requirements.txt for reproducible venv setup (Python 3.13)
This commit is contained in:
@@ -11,6 +11,7 @@ load test_helper.sh
|
||||
assert_output --partial "--category"
|
||||
assert_output --partial "--skip-longctx"
|
||||
assert_output --partial "--kv-types"
|
||||
assert_output --partial "--batch"
|
||||
}
|
||||
|
||||
@test "run-suite --help shows usage and exits 0" {
|
||||
@@ -22,6 +23,7 @@ load test_helper.sh
|
||||
assert_output --partial "--skip-longctx"
|
||||
assert_output --partial "--tag"
|
||||
assert_output --partial "--kv-types"
|
||||
assert_output --partial "--batch"
|
||||
}
|
||||
|
||||
@test "benchmark dispatcher shows help with no args" {
|
||||
@@ -31,6 +33,18 @@ load test_helper.sh
|
||||
assert_output --partial "--max-size"
|
||||
assert_output --partial "--skip-longctx"
|
||||
assert_output --partial "--kv-types"
|
||||
assert_output --partial "--batch"
|
||||
}
|
||||
|
||||
@test "serve --help shows usage and exits 0" {
|
||||
run bash "$PROJECT_ROOT/bin/serve" --help
|
||||
assert_success
|
||||
assert_output --partial "Usage"
|
||||
assert_output --partial "--model"
|
||||
assert_output --partial "--ngram"
|
||||
assert_output --partial "--no-think"
|
||||
assert_output --partial "--ctx"
|
||||
assert_output --partial "--port"
|
||||
}
|
||||
|
||||
@test "benchmark dispatcher passes --help through to baseline" {
|
||||
|
||||
Reference in New Issue
Block a user