refactor: remove hblt0 benchmark support and associated comparison scripts

2026-04-10 11:23:06 +01:00
parent 5acf54cd67
commit c129a04a1c
7 changed files with 47 additions and 327 deletions
@@ -1,189 +0,0 @@
 #!/usr/bin/env python3
 """
 Compare hipBLASLt-on vs hblt0-off benchmark runs.
 This script inspects docs/results.json (the same dataset consumed by
 docs/assets/index2.js) and reports, for every backend that was benchmarked
 both with and without the `-hblt0` suffix, which configuration wins.
 """
 from __future__ import annotations
 import argparse
 import json
 import math
 import statistics
 from collections import defaultdict
 from pathlib import Path
 from typing import Dict, Iterable, List, Tuple
 DEFAULT_RESULTS = Path("../docs") / "results.json"
 # Matches the tolerance used in docs/assets/index2.js (MIN_TOL = 0.25)
 DEFAULT_TOLERANCE = 0.25
 VariantValues = Dict[str, List[float]]
 BackendMatrix = Dict[Tuple, VariantValues]
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description=(
            "Pair benchmark runs with and without '-hblt0' and report which "
            "configuration is faster per backend."
        )
    )
    parser.add_argument(
        "--results",
        type=Path,
        default=DEFAULT_RESULTS,
        help="Path to results.json generated by the benchmark pipeline.",
    )
    parser.add_argument(
        "--tolerance",
        type=float,
        default=DEFAULT_TOLERANCE,
        help="Minimum tokens/sec delta to treat as a win (default: 0.25).",
    )
    return parser.parse_args()
 def load_runs(path: Path) -> Iterable[dict]:
    data = json.loads(path.read_text())
    runs = data.get("runs")
    if not isinstance(runs, list):
        raise ValueError(f"results.json at {path} does not contain a 'runs' array")
    return runs
 def measurement_key(run: dict) -> Tuple:
    """Return a tuple that uniquely identifies a benchmark scenario."""
    return (
        (run.get("model_clean") or run.get("model") or "").lower(),
        run.get("test") or "",
        run.get("context") or "default",
        run.get("context_tokens") or 0,
        (run.get("quant") or "").upper(),
        run.get("fa"),
        run.get("rpc"),
        run.get("ngl"),
        run.get("backend"),
    )
 def pair_runs(runs: Iterable[dict]) -> Tuple[Dict[str, BackendMatrix], Dict[str, Dict[str, int]]]:
    """
    Group runs by backend (without / with '-hblt0') and measurement key.
    Returns:
        pairs: backend -> measurement_key -> {'hipblaslt': [...], 'hblt0': [...]}
        coverage: backend -> {'hipblaslt': raw_run_count, 'hblt0': raw_run_count}
    """
    pairs: Dict[str, BackendMatrix] = defaultdict(lambda: defaultdict(dict))
    coverage: Dict[str, Dict[str, int]] = defaultdict(lambda: {"hipblaslt": 0, "hblt0": 0})
    for run in runs:
        env = run.get("env")
        if not env:
            continue
        if run.get("error"):
            continue
        tps = run.get("tps_mean")
        if not isinstance(tps, (int, float)) or math.isnan(tps):
            continue
        is_hblt0 = env.endswith("-hblt0")
        base_env = env[:-6] if is_hblt0 else env
        variant = "hblt0" if is_hblt0 else "hipblaslt"
        key = measurement_key(run)
        entry = pairs[base_env][key]
        entry.setdefault(variant, []).append(float(tps))
        coverage[base_env][variant] += 1
    return pairs, coverage
 def summarize_backend(
    backend: str,
    matrix: BackendMatrix,
    tolerance: float,
    coverage: Dict[str, int],
 ) -> dict | None:
    pairs: List[Tuple[float, float]] = []
    for entry in matrix.values():
        if "hipblaslt" not in entry or "hblt0" not in entry:
            continue
        hip = statistics.mean(entry["hipblaslt"])
        hbl = statistics.mean(entry["hblt0"])
        pairs.append((hip, hbl))
    if not pairs:
        return None
    hip_wins = sum(1 for hip, hbl in pairs if (hip - hbl) > tolerance)
    hbl_wins = sum(1 for hip, hbl in pairs if (hbl - hip) > tolerance)
    ties = len(pairs) - hip_wins - hbl_wins
    avg_hip = statistics.mean(hip for hip, _ in pairs)
    avg_hbl = statistics.mean(hbl for _, hbl in pairs)
    avg_delta = avg_hip - avg_hbl
    pct_delta = (avg_delta / avg_hbl * 100.0) if avg_hbl else float("inf")
    if avg_delta > tolerance:
        verdict = "hipBLASLt faster"
    elif avg_delta < -tolerance:
        verdict = "hblt0 faster"
    else:
        verdict = "too close to call"
    return {
        "backend": backend,
        "pairs": len(pairs),
        "hip_wins": hip_wins,
        "hbl_wins": hbl_wins,
        "ties": ties,
        "avg_hip": avg_hip,
        "avg_hbl": avg_hbl,
        "avg_delta": avg_delta,
        "pct_delta": pct_delta,
        "verdict": verdict,
        "coverage": coverage,
    }
 def format_summary(summary: dict) -> str:
    cov = summary["coverage"]
    hip_runs = cov.get("hipblaslt", 0)
    hbl_runs = cov.get("hblt0", 0)
    return (
        f"{summary['backend']}: {summary['verdict']} "
        f"(Δ {summary['avg_delta']:+.2f} tps / {summary['pct_delta']:+.2f}% "
        f"across {summary['pairs']} matched cases; "
        f"hipBLASLt wins {summary['hip_wins']}, hblt0 wins {summary['hbl_wins']}, "
        f"ties {summary['ties']}; raw runs hipBLASLt={hip_runs}, hblt0={hbl_runs})"
    )
 def main() -> None:
    args = parse_args()
    runs = load_runs(args.results)
    matrices, coverage = pair_runs(runs)
    summaries = []
    for backend in sorted(matrices):
        summary = summarize_backend(backend, matrices[backend], args.tolerance, coverage.get(backend, {}))
        if summary:
            summaries.append(summary)
    if not summaries:
        print("No matching hipBLASLt vs hblt0 pairs were found.")
        return
    for summary in summaries:
        print(format_summary(summary))
 if __name__ == "__main__":
    main()
@@ -65,7 +65,7 @@ def canonicalize_env(env):
 def parse_env_flags(basename):
    """
-    pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
+    pattern: <model>__<env>[__fa1][__longctx32768][__rpc]
    Returns (env, fa, context_tag, context_tokens, rpc_flag)
    """
    parts = basename.split("__")
@@ -82,8 +82,7 @@ def parse_env_flags(basename):
        suffix = raw_suffix.lower()
        if suffix == "fa1":
            fa = True
-        elif suffix == "hblt0":
+
            env = f"{env}-hblt0"
        elif suffix.startswith("longctx"):
            context_tag = suffix
            m = LONGCTX_RE.search(suffix)
@@ -68,41 +68,18 @@ declare -A CMDS=(
  [vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench"
 )
 get_hblt_modes() {
  local env="$1"
  if [[ "$env" == rocm* ]]; then
    printf '%s\n' default off
  else
    printf '%s\n' default
  fi
 }
 for MODEL_PATH in "${MODEL_PATHS[@]}"; do
  MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
  for ENV in "${!CMDS[@]}"; do
-    CMD="${CMDS[$ENV]}"
+    CMD_EFFECTIVE="${CMDS[$ENV]}"
    mapfile -t HBLT_MODES < <(get_hblt_modes "$ENV")
    for MODE in "${HBLT_MODES[@]}"; do
      BASE_SUFFIX=""
      CMD_EFFECTIVE="$CMD"
      if [[ "$ENV" == rocm* ]]; then
        if [[ "$MODE" == off ]]; then
          BASE_SUFFIX="__hblt0"
          CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }"
        else
          CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }"
        fi
      fi
    # run twice: baseline and with flash attention
    for FA in 1; do
-        SUFFIX="$BASE_SUFFIX"
+      SUFFIX=""
      EXTRA_ARGS=()
      if (( FA == 1 )); then
-          SUFFIX="${SUFFIX}__fa1"
+        SUFFIX="__fa1"
        EXTRA_ARGS=( -fa 1 )
      fi
@@ -145,4 +122,3 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
    done
  done
 done
 done
@@ -84,14 +84,6 @@ resolve_model_path() {
  return 1
 }
 get_hblt_modes() {
  local env="$1"
  if [[ "$env" == rocm* ]]; then
    printf '%s\n' default off
  else
    printf '%s\n' default
  fi
 }
 ensure_models_exist() {
  RESOLVED_MODELS=()
@@ -141,23 +133,13 @@ has_pending_runs() {
 start_remote_rpc() {
  local env="$1"
  local image="$2"
-  local mode="$3"
+  local suffix="$3"
  local suffix="$4"
  local remote_log="/tmp/rpc-server-${env}${suffix}.log"
  local env_prefix=""
  if [[ "$env" == rocm* ]]; then
    if [[ "$mode" == off ]]; then
      env_prefix="env ROCBLAS_USE_HIPBLASLT=0 "
    else
      env_prefix="env ROCBLAS_USE_HIPBLASLT=1 "
    fi
  fi
  ssh -p "$REMOTE_PORT" "$REMOTE_TARGET" 'bash -s' <<EOF
 set -euo pipefail
 pkill -9 -f rpc-server || true
-nohup toolbox run -c ${image} -- ${env_prefix}rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null &
+nohup toolbox run -c ${image} -- rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null &
 echo \$!
 EOF
 }
@@ -201,7 +183,6 @@ run_llama_bench_rpc() {
  local model_path="$1"
  local env="$2"
  local suffix="$3"
  local mode="$4"
  local model_name
  model_name="$(basename "${model_path}" .gguf)"
  local client_cmd="${CLIENT_CMDS[$env]:-}"
@@ -216,14 +197,6 @@ run_llama_bench_rpc() {
    return
  fi
  if [[ "$env" == rocm* ]]; then
    if [[ "$mode" == off ]]; then
      client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }"
    else
      client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }"
    fi
  fi
  local -a client_cmd_ary
  # shellcheck disable=SC2206 # intentional word splitting
  client_cmd_ary=( $client_cmd )
@@ -284,13 +257,7 @@ run_all() {
      continue
    fi
    mapfile -t hblt_modes < <(get_hblt_modes "$env")
    for mode in "${hblt_modes[@]}"; do
      local suffix=""
      if [[ "$mode" == off ]]; then
        suffix="__hblt0"
      fi
      echo
      echo "==== ${env}${suffix} -> ${image} ===="
@@ -302,7 +269,7 @@ run_all() {
      CURRENT_REMOTE_ENV="${env}${suffix}"
      local remote_pid
-      remote_pid="$(start_remote_rpc "$env" "$image" "$mode" "$suffix" | tr -d '\r')"
+      remote_pid="$(start_remote_rpc "$env" "$image" "$suffix" | tr -d '\r')"
      if [[ -z "$remote_pid" ]]; then
        echo "[ERROR] Failed to start RPC server for ${env}${suffix}"
@@ -322,14 +289,13 @@ run_all() {
      fi
      for model in "${RESOLVED_MODELS[@]}"; do
-        run_llama_bench_rpc "$model" "$env" "$suffix" "$mode"
+        run_llama_bench_rpc "$model" "$env" "$suffix"
      done
      stop_remote_rpc "$env" "$remote_pid" || true
      CURRENT_REMOTE_PID=""
      CURRENT_REMOTE_ENV=""
  done
  done
 }
 run_all
@@ -165,10 +165,7 @@ select {
    transform: translateY(-2px);
 }
-.backend-item .tag.tag-hblt0 {
+
    background: #e9edff;
    color: #1d3ea5;
 }
 .backend-item .tag.tag-rocwmma {
    background: #eef9ff;
@@ -55,9 +55,7 @@ function cacheUI() {
        stats: document.getElementById("stats-line"),
        resetBtn: document.getElementById("reset-layout"),
        tables: document.getElementById("tables"),
-        hipblasModalOpen: document.getElementById("hipblas-modal-open"),
+
        hipblasModal: document.getElementById("hipblas-modal"),
        hipblasModalClose: document.getElementById("hipblas-modal-close"),
        rpcModalOpen: document.getElementById("rpc-modal-open"),
        rpcModal: document.getElementById("rpc-modal"),
        rpcModalClose: document.getElementById("rpc-modal-close"),
@@ -72,11 +70,6 @@ function cacheUI() {
 function setupModals() {
    const modalConfigs = [
        {
            open: state.ui.hipblasModalOpen,
            modal: state.ui.hipblasModal,
            close: state.ui.hipblasModalClose,
        },
        {
            open: state.ui.rpcModalOpen,
            modal: state.ui.rpcModal,
@@ -668,7 +661,7 @@ function backendValue(entry, direction) {
 function splitEnvName(env) {
    const canonical = env.replace(/_/g, ".");
-    const tagRegex = /-(rocwmma-improved|rocwmma|improved|hblt0)/gi;
+    const tagRegex = /-(rocwmma-improved|rocwmma|improved)/gi;
    const tags = [];
    let match;
    while ((match = tagRegex.exec(canonical)) !== null) {
@@ -19,9 +19,7 @@
        <div class="legend">
            <label>Legend</label>
            <div class="legend-pills">
-                <button id="hipblas-modal-open" type="button" class="chip small legend-pill legend-pill-default">
+
                    hipBLASLt vs hblt0
                </button>
                <button id="rpc-modal-open" type="button" class="chip small legend-pill legend-pill-rpc">
                    RPC · dual server
                </button>
@@ -83,26 +81,6 @@
        <div id="tables"></div>
    </section>
    <div id="hipblas-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="hipblas-title">
        <div class="modal-content">
            <button id="hipblas-modal-close" class="modal-close" aria-label="Close dialog">×</button>
            <h2 id="hipblas-title">hipBLASLt &amp; hblt0 explained</h2>
            <p>The ROCm toolboxes ship with <code>ROCBLAS_USE_HIPBLASLT=1</code> by default. This forces rocBLAS to
                prefer
                the hipBLASLt kernel library, which historically delivered the best throughput on gfx1151 (Strix Halo).
            </p>
            <p>Rows tagged with <code>__hblt0</code> were re-run with <code>ROCBLAS_USE_HIPBLASLT=0</code>, letting
                rocBLAS
                auto-select between hipBLASLt, Tensile, or other kernel providers. These runs show how performance
                shifts when
                the tuned hipBLASLt path is disabled.</p>
            <p>hipBLASLt is AMD's LT (low-level tuned) matmul backend, optimized for transformer workloads. Disabling it
                can
                expose regressions or improvements depending on driver versions, so both configurations are published
                for
                comparison.</p>
        </div>
    </div>
    <div id="rpc-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rpc-title">
        <div class="modal-content">