Bugs fixed in production code: - compare.sh: Python truthiness on 0.0 — `if b_val` was False for 0.0 t/s, displaying it as a dash instead of "0.0". Fixed with `is not None` checks. - compare.sh: ZeroDivisionError when computing delta % with 0.0 baseline. Test improvements (review findings): - detect.bats: kernel param tests now use real detect_kernel_param logic pattern (not a separate reimplementation). Added non-GiB-aligned RAM test, device ID without 0x prefix, empty firmware version, llama-bench detection, detect_total_physical_ram_kb tests. - benchmark_compare.bats: assert delta percentages (+20.0%, -25.0%, 0.0%), test 0.0 t/s edge case, test per-directory error messages, test config change detection with specific field assertions. - log_metrics.bats: add assert_success, --help test, timestamp format validation. Remove unused mock sysfs setup. - common.bats: fix data_dir test, remove redundant assertion, add cleanup. - test_helper.sh: remove unused FIXTURES_DIR. - Remove empty tests/fixtures/ directory. 94 tests, all passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
152 lines
7.0 KiB
Bash
152 lines
7.0 KiB
Bash
#!/usr/bin/env bats
|
|
# Tests for scripts/benchmark/compare.sh — result comparison logic
|
|
|
|
load test_helper.sh
|
|
|
|
setup() {
|
|
source_lib common.sh
|
|
source_lib format.sh
|
|
BEFORE_DIR="$(mktemp -d)"
|
|
AFTER_DIR="$(mktemp -d)"
|
|
}
|
|
|
|
teardown() {
|
|
rm -rf "$BEFORE_DIR" "$AFTER_DIR"
|
|
}
|
|
|
|
write_summary() {
|
|
local dir="$1" json="$2"
|
|
echo "$json" > "$dir/summary.json"
|
|
}
|
|
|
|
write_system_state() {
|
|
local dir="$1" json="$2"
|
|
echo "$json" > "$dir/system-state.json"
|
|
}
|
|
|
|
_make_state() { echo '{"memory":{"vram_total_bytes":0},"kernel":{"param_iommu":"","param_gttsize":"","param_pages_limit":""},"tuned_profile":""}'; }
|
|
|
|
# ── Basic ────────────────────────────────────────────────
|
|
|
|
@test "compare: shows usage when called without args" {
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh"
|
|
assert_failure
|
|
assert_output --partial "Usage"
|
|
}
|
|
|
|
@test "compare: fails when summary.json missing in before dir" {
|
|
write_summary "$AFTER_DIR" '{"results":[]}'
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_failure
|
|
assert_output --partial "No summary.json"
|
|
# Should mention the actual directory
|
|
assert_output --partial "$BEFORE_DIR"
|
|
}
|
|
|
|
@test "compare: fails when summary.json missing in after dir" {
|
|
write_summary "$BEFORE_DIR" '{"results":[]}'
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_failure
|
|
assert_output --partial "No summary.json"
|
|
assert_output --partial "$AFTER_DIR"
|
|
}
|
|
|
|
@test "compare: handles empty results gracefully" {
|
|
write_summary "$BEFORE_DIR" '{"results":[]}'
|
|
write_summary "$AFTER_DIR" '{"results":[]}'
|
|
write_system_state "$BEFORE_DIR" "$(_make_state)"
|
|
write_system_state "$AFTER_DIR" "$(_make_state)"
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
assert_output --partial "No comparable results"
|
|
}
|
|
|
|
# ── Delta computation ────────────────────────────────────
|
|
|
|
@test "compare: shows positive delta for improvement" {
|
|
write_summary "$BEFORE_DIR" '{"results":[{"model":"qwen3","backend":"Vulkan","test":"pp512","tokens_per_sec":500.0,"file":"t.log","size":"4GB","raw":"500.0"}]}'
|
|
write_summary "$AFTER_DIR" '{"results":[{"model":"qwen3","backend":"Vulkan","test":"pp512","tokens_per_sec":600.0,"file":"t.log","size":"4GB","raw":"600.0"}]}'
|
|
write_system_state "$BEFORE_DIR" "$(_make_state)"
|
|
write_system_state "$AFTER_DIR" "$(_make_state)"
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
assert_output --partial "500.0"
|
|
assert_output --partial "600.0"
|
|
assert_output --partial "+20.0%"
|
|
}
|
|
|
|
@test "compare: shows negative delta for regression" {
|
|
write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"tg128","tokens_per_sec":20.0,"file":"f","size":"s","raw":"20.0"}]}'
|
|
write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"tg128","tokens_per_sec":15.0,"file":"f","size":"s","raw":"15.0"}]}'
|
|
write_system_state "$BEFORE_DIR" "$(_make_state)"
|
|
write_system_state "$AFTER_DIR" "$(_make_state)"
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
assert_output --partial "-25.0%"
|
|
}
|
|
|
|
@test "compare: shows 0.0% delta when values are identical" {
|
|
write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":100.0,"file":"f","size":"s","raw":"100.0"}]}'
|
|
write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":100.0,"file":"f","size":"s","raw":"100.0"}]}'
|
|
write_system_state "$BEFORE_DIR" "$(_make_state)"
|
|
write_system_state "$AFTER_DIR" "$(_make_state)"
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
assert_output --partial "0.0%"
|
|
}
|
|
|
|
@test "compare: handles 0.0 tokens/sec correctly (not displayed as dash)" {
|
|
write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":0.0,"file":"f","size":"s","raw":"0.0"}]}'
|
|
write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":10.0,"file":"f","size":"s","raw":"10.0"}]}'
|
|
write_system_state "$BEFORE_DIR" "$(_make_state)"
|
|
write_system_state "$AFTER_DIR" "$(_make_state)"
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
# 0.0 should appear as a number, not as a dash
|
|
assert_output --partial "0.0"
|
|
refute_output --partial "—"
|
|
}
|
|
|
|
@test "compare: result only in before shows dash for after" {
|
|
write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":100.0,"file":"f","size":"s","raw":"100.0"}]}'
|
|
write_summary "$AFTER_DIR" '{"results":[]}'
|
|
write_system_state "$BEFORE_DIR" "$(_make_state)"
|
|
write_system_state "$AFTER_DIR" "$(_make_state)"
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
assert_output --partial "100.0"
|
|
}
|
|
|
|
# ── Config change detection ──────────────────────────────
|
|
|
|
@test "compare: detects VRAM change between runs" {
|
|
write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":1.0,"file":"f","size":"s","raw":"1.0"}]}'
|
|
write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":2.0,"file":"f","size":"s","raw":"2.0"}]}'
|
|
write_system_state "$BEFORE_DIR" '{"memory":{"vram_total_bytes":34359738368},"kernel":{"param_iommu":"","param_gttsize":"","param_pages_limit":""},"tuned_profile":"throughput-performance"}'
|
|
write_system_state "$AFTER_DIR" '{"memory":{"vram_total_bytes":536870912},"kernel":{"param_iommu":"pt","param_gttsize":"61440","param_pages_limit":"15728640"},"tuned_profile":"accelerator-performance"}'
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
assert_output --partial "Configuration changes"
|
|
assert_output --partial "VRAM"
|
|
assert_output --partial "tuned"
|
|
}
|
|
|
|
@test "compare: no config changes when states match" {
|
|
local state='{"memory":{"vram_total_bytes":536870912},"kernel":{"param_iommu":"pt","param_gttsize":"61440","param_pages_limit":"15728640"},"tuned_profile":"accelerator-performance"}'
|
|
write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":1.0,"file":"f","size":"s","raw":"1.0"}]}'
|
|
write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":1.0,"file":"f","size":"s","raw":"1.0"}]}'
|
|
write_system_state "$BEFORE_DIR" "$state"
|
|
write_system_state "$AFTER_DIR" "$state"
|
|
|
|
run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR"
|
|
assert_success
|
|
assert_output --partial "No configuration changes"
|
|
}
|