#!/usr/bin/env bats # Tests for scripts/benchmark/compare.sh — result comparison logic load test_helper.sh setup() { source_lib common.sh source_lib format.sh BEFORE_DIR="$(mktemp -d)" AFTER_DIR="$(mktemp -d)" } teardown() { rm -rf "$BEFORE_DIR" "$AFTER_DIR" } write_summary() { local dir="$1" json="$2" echo "$json" > "$dir/summary.json" } write_system_state() { local dir="$1" json="$2" echo "$json" > "$dir/system-state.json" } _make_state() { echo '{"memory":{"vram_total_bytes":0},"kernel":{"param_iommu":"","param_gttsize":"","param_pages_limit":""},"tuned_profile":""}'; } # ── Basic ──────────────────────────────────────────────── @test "compare: shows usage when called without args" { run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" assert_failure assert_output --partial "Usage" } @test "compare: fails when summary.json missing in before dir" { write_summary "$AFTER_DIR" '{"results":[]}' run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_failure assert_output --partial "No summary.json" # Should mention the actual directory assert_output --partial "$BEFORE_DIR" } @test "compare: fails when summary.json missing in after dir" { write_summary "$BEFORE_DIR" '{"results":[]}' run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_failure assert_output --partial "No summary.json" assert_output --partial "$AFTER_DIR" } @test "compare: handles empty results gracefully" { write_summary "$BEFORE_DIR" '{"results":[]}' write_summary "$AFTER_DIR" '{"results":[]}' write_system_state "$BEFORE_DIR" "$(_make_state)" write_system_state "$AFTER_DIR" "$(_make_state)" run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success assert_output --partial "No comparable results" } # ── Delta computation ──────────────────────────────────── @test "compare: shows positive delta for improvement" { write_summary "$BEFORE_DIR" '{"results":[{"model":"qwen3","backend":"Vulkan","test":"pp512","tokens_per_sec":500.0,"file":"t.log","size":"4GB","raw":"500.0"}]}' write_summary "$AFTER_DIR" '{"results":[{"model":"qwen3","backend":"Vulkan","test":"pp512","tokens_per_sec":600.0,"file":"t.log","size":"4GB","raw":"600.0"}]}' write_system_state "$BEFORE_DIR" "$(_make_state)" write_system_state "$AFTER_DIR" "$(_make_state)" run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success assert_output --partial "500.0" assert_output --partial "600.0" assert_output --partial "+20.0%" } @test "compare: shows negative delta for regression" { write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"tg128","tokens_per_sec":20.0,"file":"f","size":"s","raw":"20.0"}]}' write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"tg128","tokens_per_sec":15.0,"file":"f","size":"s","raw":"15.0"}]}' write_system_state "$BEFORE_DIR" "$(_make_state)" write_system_state "$AFTER_DIR" "$(_make_state)" run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success assert_output --partial "-25.0%" } @test "compare: shows 0.0% delta when values are identical" { write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":100.0,"file":"f","size":"s","raw":"100.0"}]}' write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":100.0,"file":"f","size":"s","raw":"100.0"}]}' write_system_state "$BEFORE_DIR" "$(_make_state)" write_system_state "$AFTER_DIR" "$(_make_state)" run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success assert_output --partial "0.0%" } @test "compare: handles 0.0 tokens/sec correctly (not displayed as dash)" { write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":0.0,"file":"f","size":"s","raw":"0.0"}]}' write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":10.0,"file":"f","size":"s","raw":"10.0"}]}' write_system_state "$BEFORE_DIR" "$(_make_state)" write_system_state "$AFTER_DIR" "$(_make_state)" run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success # 0.0 should appear as a number, not as a dash assert_output --partial "0.0" refute_output --partial "—" } @test "compare: result only in before shows dash for after" { write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"pp512","tokens_per_sec":100.0,"file":"f","size":"s","raw":"100.0"}]}' write_summary "$AFTER_DIR" '{"results":[]}' write_system_state "$BEFORE_DIR" "$(_make_state)" write_system_state "$AFTER_DIR" "$(_make_state)" run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success assert_output --partial "100.0" } # ── Config change detection ────────────────────────────── @test "compare: detects VRAM change between runs" { write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":1.0,"file":"f","size":"s","raw":"1.0"}]}' write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":2.0,"file":"f","size":"s","raw":"2.0"}]}' write_system_state "$BEFORE_DIR" '{"memory":{"vram_total_bytes":34359738368},"kernel":{"param_iommu":"","param_gttsize":"","param_pages_limit":""},"tuned_profile":"throughput-performance"}' write_system_state "$AFTER_DIR" '{"memory":{"vram_total_bytes":536870912},"kernel":{"param_iommu":"pt","param_gttsize":"61440","param_pages_limit":"15728640"},"tuned_profile":"accelerator-performance"}' run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success assert_output --partial "Configuration changes" assert_output --partial "VRAM" assert_output --partial "tuned" } @test "compare: no config changes when states match" { local state='{"memory":{"vram_total_bytes":536870912},"kernel":{"param_iommu":"pt","param_gttsize":"61440","param_pages_limit":"15728640"},"tuned_profile":"accelerator-performance"}' write_summary "$BEFORE_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":1.0,"file":"f","size":"s","raw":"1.0"}]}' write_summary "$AFTER_DIR" '{"results":[{"model":"m","backend":"b","test":"t","tokens_per_sec":1.0,"file":"f","size":"s","raw":"1.0"}]}' write_system_state "$BEFORE_DIR" "$state" write_system_state "$AFTER_DIR" "$state" run bash "$PROJECT_ROOT/scripts/benchmark/compare.sh" "$BEFORE_DIR" "$AFTER_DIR" assert_success assert_output --partial "No configuration changes" }