#!/usr/bin/env bash # Compare two benchmark runs side-by-side set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../../lib/common.sh" source "$SCRIPT_DIR/../../lib/format.sh" if [[ $# -lt 2 ]]; then echo "Usage: benchmark compare " echo "" echo "Examples:" echo " bin/benchmark compare data/baselines/20260325-120000 data/benchmarks/post-opt-20260326-100000" echo "" echo "Available baselines:" ls -d "$(data_dir baselines)"/*/ 2>/dev/null | sed 's|/$||' || echo " (none)" echo "" echo "Available benchmark runs:" ls -d "$(data_dir benchmarks)"/*/ 2>/dev/null | sed 's|/$||' || echo " (none)" exit 1 fi BEFORE_DIR="$1" AFTER_DIR="$2" for d in "$BEFORE_DIR" "$AFTER_DIR"; do if [[ ! -f "$d/summary.json" ]]; then log_error "No summary.json in $d" exit 1 fi done log_header "Benchmark Comparison" # Extract timestamps from directory names before_name="$(basename "$BEFORE_DIR")" after_name="$(basename "$AFTER_DIR")" log_info "Before: $before_name" log_info "After: $after_name" # Show system state diff if available if [[ -f "$BEFORE_DIR/system-state.json" ]] && [[ -f "$AFTER_DIR/system-state.json" ]]; then echo "" python3 - "$BEFORE_DIR/system-state.json" "$AFTER_DIR/system-state.json" << 'PYEOF' import sys, json with open(sys.argv[1]) as f: before = json.load(f) with open(sys.argv[2]) as f: after = json.load(f) changes = [] # Check key config differences b_mem = before.get("memory", {}) a_mem = after.get("memory", {}) if b_mem.get("vram_total_bytes") != a_mem.get("vram_total_bytes"): bv = b_mem.get("vram_total_bytes", 0) / 2**30 av = a_mem.get("vram_total_bytes", 0) / 2**30 changes.append(f" VRAM: {bv:.1f} GiB -> {av:.1f} GiB") if b_mem.get("gtt_total_bytes") != a_mem.get("gtt_total_bytes"): bg = b_mem.get("gtt_total_bytes", 0) / 2**30 ag = a_mem.get("gtt_total_bytes", 0) / 2**30 changes.append(f" GTT: {bg:.1f} GiB -> {ag:.1f} GiB") b_kern = before.get("kernel", {}) a_kern = after.get("kernel", {}) for param in ["param_iommu", "param_gttsize", "param_pages_limit"]: bv = b_kern.get(param, "") av = a_kern.get(param, "") if bv != av: changes.append(f" {param}: '{bv}' -> '{av}'") bt = before.get("tuned_profile", "") at = after.get("tuned_profile", "") if bt != at: changes.append(f" tuned: {bt} -> {at}") if changes: print(" Configuration changes:") for c in changes: print(c) else: print(" No configuration changes detected") PYEOF fi # Compare results echo "" python3 - "$BEFORE_DIR/summary.json" "$AFTER_DIR/summary.json" << 'PYEOF' import sys, json with open(sys.argv[1]) as f: before = json.load(f) with open(sys.argv[2]) as f: after = json.load(f) # Index by (model, backend, test) def index_results(data): idx = {} for r in data.get("results", []): key = (r["model"], r["backend"], r["test"]) idx[key] = r["tokens_per_sec"] return idx b_idx = index_results(before) a_idx = index_results(after) all_keys = sorted(set(b_idx.keys()) | set(a_idx.keys())) if not all_keys: print(" No comparable results found.") sys.exit(0) fmt = " {:<18} {:<14} {:<7} {:>9} {:>9} {:>8}" print(fmt.format("Model", "Backend", "Test", "Before", "After", "Delta")) print(" " + "-" * 70) for key in all_keys: model, backend, test = key b_val = b_idx.get(key) a_val = a_idx.get(key) b_str = f"{b_val:.1f}" if b_val else "—" a_str = f"{a_val:.1f}" if a_val else "—" if b_val and a_val: delta_pct = (a_val - b_val) / b_val * 100 if delta_pct > 0: d_str = f"\033[32m+{delta_pct:.1f}%\033[0m" elif delta_pct < 0: d_str = f"\033[31m{delta_pct:.1f}%\033[0m" else: d_str = "0.0%" else: d_str = "—" print(fmt.format(model[:18], backend[:14], test, b_str, a_str, d_str)) print() PYEOF