#!/usr/bin/env python3 """ Compare hipBLASLt-on vs hblt0-off benchmark runs. This script inspects docs/results.json (the same dataset consumed by docs/assets/index2.js) and reports, for every backend that was benchmarked both with and without the `-hblt0` suffix, which configuration wins. """ from __future__ import annotations import argparse import json import math import statistics from collections import defaultdict from pathlib import Path from typing import Dict, Iterable, List, Tuple DEFAULT_RESULTS = Path("../docs") / "results.json" # Matches the tolerance used in docs/assets/index2.js (MIN_TOL = 0.25) DEFAULT_TOLERANCE = 0.25 VariantValues = Dict[str, List[float]] BackendMatrix = Dict[Tuple, VariantValues] def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description=( "Pair benchmark runs with and without '-hblt0' and report which " "configuration is faster per backend." ) ) parser.add_argument( "--results", type=Path, default=DEFAULT_RESULTS, help="Path to results.json generated by the benchmark pipeline.", ) parser.add_argument( "--tolerance", type=float, default=DEFAULT_TOLERANCE, help="Minimum tokens/sec delta to treat as a win (default: 0.25).", ) return parser.parse_args() def load_runs(path: Path) -> Iterable[dict]: data = json.loads(path.read_text()) runs = data.get("runs") if not isinstance(runs, list): raise ValueError(f"results.json at {path} does not contain a 'runs' array") return runs def measurement_key(run: dict) -> Tuple: """Return a tuple that uniquely identifies a benchmark scenario.""" return ( (run.get("model_clean") or run.get("model") or "").lower(), run.get("test") or "", run.get("context") or "default", run.get("context_tokens") or 0, (run.get("quant") or "").upper(), run.get("fa"), run.get("rpc"), run.get("ngl"), run.get("backend"), ) def pair_runs(runs: Iterable[dict]) -> Tuple[Dict[str, BackendMatrix], Dict[str, Dict[str, int]]]: """ Group runs by backend (without / with '-hblt0') and measurement key. Returns: pairs: backend -> measurement_key -> {'hipblaslt': [...], 'hblt0': [...]} coverage: backend -> {'hipblaslt': raw_run_count, 'hblt0': raw_run_count} """ pairs: Dict[str, BackendMatrix] = defaultdict(lambda: defaultdict(dict)) coverage: Dict[str, Dict[str, int]] = defaultdict(lambda: {"hipblaslt": 0, "hblt0": 0}) for run in runs: env = run.get("env") if not env: continue if run.get("error"): continue tps = run.get("tps_mean") if not isinstance(tps, (int, float)) or math.isnan(tps): continue is_hblt0 = env.endswith("-hblt0") base_env = env[:-6] if is_hblt0 else env variant = "hblt0" if is_hblt0 else "hipblaslt" key = measurement_key(run) entry = pairs[base_env][key] entry.setdefault(variant, []).append(float(tps)) coverage[base_env][variant] += 1 return pairs, coverage def summarize_backend( backend: str, matrix: BackendMatrix, tolerance: float, coverage: Dict[str, int], ) -> dict | None: pairs: List[Tuple[float, float]] = [] for entry in matrix.values(): if "hipblaslt" not in entry or "hblt0" not in entry: continue hip = statistics.mean(entry["hipblaslt"]) hbl = statistics.mean(entry["hblt0"]) pairs.append((hip, hbl)) if not pairs: return None hip_wins = sum(1 for hip, hbl in pairs if (hip - hbl) > tolerance) hbl_wins = sum(1 for hip, hbl in pairs if (hbl - hip) > tolerance) ties = len(pairs) - hip_wins - hbl_wins avg_hip = statistics.mean(hip for hip, _ in pairs) avg_hbl = statistics.mean(hbl for _, hbl in pairs) avg_delta = avg_hip - avg_hbl pct_delta = (avg_delta / avg_hbl * 100.0) if avg_hbl else float("inf") if avg_delta > tolerance: verdict = "hipBLASLt faster" elif avg_delta < -tolerance: verdict = "hblt0 faster" else: verdict = "too close to call" return { "backend": backend, "pairs": len(pairs), "hip_wins": hip_wins, "hbl_wins": hbl_wins, "ties": ties, "avg_hip": avg_hip, "avg_hbl": avg_hbl, "avg_delta": avg_delta, "pct_delta": pct_delta, "verdict": verdict, "coverage": coverage, } def format_summary(summary: dict) -> str: cov = summary["coverage"] hip_runs = cov.get("hipblaslt", 0) hbl_runs = cov.get("hblt0", 0) return ( f"{summary['backend']}: {summary['verdict']} " f"(Δ {summary['avg_delta']:+.2f} tps / {summary['pct_delta']:+.2f}% " f"across {summary['pairs']} matched cases; " f"hipBLASLt wins {summary['hip_wins']}, hblt0 wins {summary['hbl_wins']}, " f"ties {summary['ties']}; raw runs hipBLASLt={hip_runs}, hblt0={hbl_runs})" ) def main() -> None: args = parse_args() runs = load_runs(args.results) matrices, coverage = pair_runs(runs) summaries = [] for backend in sorted(matrices): summary = summarize_backend(backend, matrices[backend], args.tolerance, coverage.get(backend, {})) if summary: summaries.append(summary) if not summaries: print("No matching hipBLASLt vs hblt0 pairs were found.") return for summary in summaries: print(format_summary(summary)) if __name__ == "__main__": main()