Initial commit

2026-03-25 20:13:15 +01:00
commit c596e38e9e
26 changed files with 2345 additions and 0 deletions
--- a/scripts/benchmark/compare.sh
+++ b/scripts/benchmark/compare.sh
@@ -0,0 +1,140 @@
+#!/usr/bin/env bash
+# Compare two benchmark runs side-by-side
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/../../lib/common.sh"
+source "$SCRIPT_DIR/../../lib/format.sh"
+
+if [[ $# -lt 2 ]]; then
+    echo "Usage: benchmark compare <before-dir> <after-dir>"
+    echo ""
+    echo "Examples:"
+    echo "  bin/benchmark compare data/baselines/20260325-120000 data/benchmarks/post-opt-20260326-100000"
+    echo ""
+    echo "Available baselines:"
+    ls -d "$(data_dir baselines)"/*/ 2>/dev/null | sed 's|/$||' || echo "  (none)"
+    echo ""
+    echo "Available benchmark runs:"
+    ls -d "$(data_dir benchmarks)"/*/ 2>/dev/null | sed 's|/$||' || echo "  (none)"
+    exit 1
+fi
+
+BEFORE_DIR="$1"
+AFTER_DIR="$2"
+
+for d in "$BEFORE_DIR" "$AFTER_DIR"; do
+    if [[ ! -f "$d/summary.json" ]]; then
+        log_error "No summary.json in $d"
+        exit 1
+    fi
+done
+
+log_header "Benchmark Comparison"
+
+# Extract timestamps from directory names
+before_name="$(basename "$BEFORE_DIR")"
+after_name="$(basename "$AFTER_DIR")"
+log_info "Before: $before_name"
+log_info "After:  $after_name"
+
+# Show system state diff if available
+if [[ -f "$BEFORE_DIR/system-state.json" ]] && [[ -f "$AFTER_DIR/system-state.json" ]]; then
+    echo ""
+    python3 - "$BEFORE_DIR/system-state.json" "$AFTER_DIR/system-state.json" << 'PYEOF'
+import sys, json
+
+with open(sys.argv[1]) as f:
+    before = json.load(f)
+with open(sys.argv[2]) as f:
+    after = json.load(f)
+
+changes = []
+# Check key config differences
+b_mem = before.get("memory", {})
+a_mem = after.get("memory", {})
+if b_mem.get("vram_total_bytes") != a_mem.get("vram_total_bytes"):
+    bv = b_mem.get("vram_total_bytes", 0) / 2**30
+    av = a_mem.get("vram_total_bytes", 0) / 2**30
+    changes.append(f"  VRAM: {bv:.1f} GiB -> {av:.1f} GiB")
+if b_mem.get("gtt_total_bytes") != a_mem.get("gtt_total_bytes"):
+    bg = b_mem.get("gtt_total_bytes", 0) / 2**30
+    ag = a_mem.get("gtt_total_bytes", 0) / 2**30
+    changes.append(f"  GTT:  {bg:.1f} GiB -> {ag:.1f} GiB")
+
+b_kern = before.get("kernel", {})
+a_kern = after.get("kernel", {})
+for param in ["param_iommu", "param_gttsize", "param_pages_limit"]:
+    bv = b_kern.get(param, "")
+    av = a_kern.get(param, "")
+    if bv != av:
+        changes.append(f"  {param}: '{bv}' -> '{av}'")
+
+bt = before.get("tuned_profile", "")
+at = after.get("tuned_profile", "")
+if bt != at:
+    changes.append(f"  tuned: {bt} -> {at}")
+
+if changes:
+    print("  Configuration changes:")
+    for c in changes:
+        print(c)
+else:
+    print("  No configuration changes detected")
+PYEOF
+fi
+
+# Compare results
+echo ""
+python3 - "$BEFORE_DIR/summary.json" "$AFTER_DIR/summary.json" << 'PYEOF'
+import sys, json
+
+with open(sys.argv[1]) as f:
+    before = json.load(f)
+with open(sys.argv[2]) as f:
+    after = json.load(f)
+
+# Index by (model, backend, test)
+def index_results(data):
+    idx = {}
+    for r in data.get("results", []):
+        key = (r["model"], r["backend"], r["test"])
+        idx[key] = r["tokens_per_sec"]
+    return idx
+
+b_idx = index_results(before)
+a_idx = index_results(after)
+
+all_keys = sorted(set(b_idx.keys()) | set(a_idx.keys()))
+
+if not all_keys:
+    print("  No comparable results found.")
+    sys.exit(0)
+
+fmt = "  {:<18} {:<14} {:<7} {:>9} {:>9} {:>8}"
+print(fmt.format("Model", "Backend", "Test", "Before", "After", "Delta"))
+print("  " + "-" * 70)
+
+for key in all_keys:
+    model, backend, test = key
+    b_val = b_idx.get(key)
+    a_val = a_idx.get(key)
+
+    b_str = f"{b_val:.1f}" if b_val else "—"
+    a_str = f"{a_val:.1f}" if a_val else "—"
+
+    if b_val and a_val:
+        delta_pct = (a_val - b_val) / b_val * 100
+        if delta_pct > 0:
+            d_str = f"\033[32m+{delta_pct:.1f}%\033[0m"
+        elif delta_pct < 0:
+            d_str = f"\033[31m{delta_pct:.1f}%\033[0m"
+        else:
+            d_str = "0.0%"
+    else:
+        d_str = "—"
+
+    print(fmt.format(model[:18], backend[:14], test, b_str, a_str, d_str))
+
+print()
+PYEOF