From c129a04a1c22ea4c891a25af149a95e78758b941 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Fri, 10 Apr 2026 11:23:06 +0100 Subject: [PATCH] refactor: remove hblt0 benchmark support and associated comparison scripts --- benchmark/compare_hblt0.py | 189 ----------------------------- benchmark/generate_results_json.py | 5 +- benchmark/run_benchmarks.sh | 98 ++++++--------- benchmark/run_rpc_benchmarks.sh | 42 +------ docs/assets/index2.css | 5 +- docs/assets/index2.js | 11 +- docs/index.html | 24 +--- 7 files changed, 47 insertions(+), 327 deletions(-) delete mode 100644 benchmark/compare_hblt0.py diff --git a/benchmark/compare_hblt0.py b/benchmark/compare_hblt0.py deleted file mode 100644 index 68bfd3e..0000000 --- a/benchmark/compare_hblt0.py +++ /dev/null @@ -1,189 +0,0 @@ -#!/usr/bin/env python3 -""" -Compare hipBLASLt-on vs hblt0-off benchmark runs. - -This script inspects docs/results.json (the same dataset consumed by -docs/assets/index2.js) and reports, for every backend that was benchmarked -both with and without the `-hblt0` suffix, which configuration wins. -""" - -from __future__ import annotations - -import argparse -import json -import math -import statistics -from collections import defaultdict -from pathlib import Path -from typing import Dict, Iterable, List, Tuple - - -DEFAULT_RESULTS = Path("../docs") / "results.json" -# Matches the tolerance used in docs/assets/index2.js (MIN_TOL = 0.25) -DEFAULT_TOLERANCE = 0.25 - -VariantValues = Dict[str, List[float]] -BackendMatrix = Dict[Tuple, VariantValues] - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - description=( - "Pair benchmark runs with and without '-hblt0' and report which " - "configuration is faster per backend." - ) - ) - parser.add_argument( - "--results", - type=Path, - default=DEFAULT_RESULTS, - help="Path to results.json generated by the benchmark pipeline.", - ) - parser.add_argument( - "--tolerance", - type=float, - default=DEFAULT_TOLERANCE, - help="Minimum tokens/sec delta to treat as a win (default: 0.25).", - ) - return parser.parse_args() - - -def load_runs(path: Path) -> Iterable[dict]: - data = json.loads(path.read_text()) - runs = data.get("runs") - if not isinstance(runs, list): - raise ValueError(f"results.json at {path} does not contain a 'runs' array") - return runs - - -def measurement_key(run: dict) -> Tuple: - """Return a tuple that uniquely identifies a benchmark scenario.""" - return ( - (run.get("model_clean") or run.get("model") or "").lower(), - run.get("test") or "", - run.get("context") or "default", - run.get("context_tokens") or 0, - (run.get("quant") or "").upper(), - run.get("fa"), - run.get("rpc"), - run.get("ngl"), - run.get("backend"), - ) - - -def pair_runs(runs: Iterable[dict]) -> Tuple[Dict[str, BackendMatrix], Dict[str, Dict[str, int]]]: - """ - Group runs by backend (without / with '-hblt0') and measurement key. - - Returns: - pairs: backend -> measurement_key -> {'hipblaslt': [...], 'hblt0': [...]} - coverage: backend -> {'hipblaslt': raw_run_count, 'hblt0': raw_run_count} - """ - pairs: Dict[str, BackendMatrix] = defaultdict(lambda: defaultdict(dict)) - coverage: Dict[str, Dict[str, int]] = defaultdict(lambda: {"hipblaslt": 0, "hblt0": 0}) - - for run in runs: - env = run.get("env") - if not env: - continue - if run.get("error"): - continue - tps = run.get("tps_mean") - if not isinstance(tps, (int, float)) or math.isnan(tps): - continue - - is_hblt0 = env.endswith("-hblt0") - base_env = env[:-6] if is_hblt0 else env - variant = "hblt0" if is_hblt0 else "hipblaslt" - - key = measurement_key(run) - entry = pairs[base_env][key] - entry.setdefault(variant, []).append(float(tps)) - coverage[base_env][variant] += 1 - - return pairs, coverage - - -def summarize_backend( - backend: str, - matrix: BackendMatrix, - tolerance: float, - coverage: Dict[str, int], -) -> dict | None: - pairs: List[Tuple[float, float]] = [] - - for entry in matrix.values(): - if "hipblaslt" not in entry or "hblt0" not in entry: - continue - hip = statistics.mean(entry["hipblaslt"]) - hbl = statistics.mean(entry["hblt0"]) - pairs.append((hip, hbl)) - - if not pairs: - return None - - hip_wins = sum(1 for hip, hbl in pairs if (hip - hbl) > tolerance) - hbl_wins = sum(1 for hip, hbl in pairs if (hbl - hip) > tolerance) - ties = len(pairs) - hip_wins - hbl_wins - - avg_hip = statistics.mean(hip for hip, _ in pairs) - avg_hbl = statistics.mean(hbl for _, hbl in pairs) - avg_delta = avg_hip - avg_hbl - pct_delta = (avg_delta / avg_hbl * 100.0) if avg_hbl else float("inf") - - if avg_delta > tolerance: - verdict = "hipBLASLt faster" - elif avg_delta < -tolerance: - verdict = "hblt0 faster" - else: - verdict = "too close to call" - - return { - "backend": backend, - "pairs": len(pairs), - "hip_wins": hip_wins, - "hbl_wins": hbl_wins, - "ties": ties, - "avg_hip": avg_hip, - "avg_hbl": avg_hbl, - "avg_delta": avg_delta, - "pct_delta": pct_delta, - "verdict": verdict, - "coverage": coverage, - } - - -def format_summary(summary: dict) -> str: - cov = summary["coverage"] - hip_runs = cov.get("hipblaslt", 0) - hbl_runs = cov.get("hblt0", 0) - return ( - f"{summary['backend']}: {summary['verdict']} " - f"(Δ {summary['avg_delta']:+.2f} tps / {summary['pct_delta']:+.2f}% " - f"across {summary['pairs']} matched cases; " - f"hipBLASLt wins {summary['hip_wins']}, hblt0 wins {summary['hbl_wins']}, " - f"ties {summary['ties']}; raw runs hipBLASLt={hip_runs}, hblt0={hbl_runs})" - ) - - -def main() -> None: - args = parse_args() - runs = load_runs(args.results) - matrices, coverage = pair_runs(runs) - - summaries = [] - for backend in sorted(matrices): - summary = summarize_backend(backend, matrices[backend], args.tolerance, coverage.get(backend, {})) - if summary: - summaries.append(summary) - - if not summaries: - print("No matching hipBLASLt vs hblt0 pairs were found.") - return - - for summary in summaries: - print(format_summary(summary)) - - -if __name__ == "__main__": - main() diff --git a/benchmark/generate_results_json.py b/benchmark/generate_results_json.py index 7ff7954..f7497ec 100644 --- a/benchmark/generate_results_json.py +++ b/benchmark/generate_results_json.py @@ -65,7 +65,7 @@ def canonicalize_env(env): def parse_env_flags(basename): """ - pattern: __[__fa1][__hblt0][__longctx32768][__rpc] + pattern: __[__fa1][__longctx32768][__rpc] Returns (env, fa, context_tag, context_tokens, rpc_flag) """ parts = basename.split("__") @@ -82,8 +82,7 @@ def parse_env_flags(basename): suffix = raw_suffix.lower() if suffix == "fa1": fa = True - elif suffix == "hblt0": - env = f"{env}-hblt0" + elif suffix.startswith("longctx"): context_tag = suffix m = LONGCTX_RE.search(suffix) diff --git a/benchmark/run_benchmarks.sh b/benchmark/run_benchmarks.sh index 71ede28..b7eb91a 100755 --- a/benchmark/run_benchmarks.sh +++ b/benchmark/run_benchmarks.sh @@ -68,80 +68,56 @@ declare -A CMDS=( [vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench" ) -get_hblt_modes() { - local env="$1" - if [[ "$env" == rocm* ]]; then - printf '%s\n' default off - else - printf '%s\n' default - fi -} - for MODEL_PATH in "${MODEL_PATHS[@]}"; do MODEL_NAME="$(basename "$MODEL_PATH" .gguf)" for ENV in "${!CMDS[@]}"; do - CMD="${CMDS[$ENV]}" - mapfile -t HBLT_MODES < <(get_hblt_modes "$ENV") + CMD_EFFECTIVE="${CMDS[$ENV]}" - for MODE in "${HBLT_MODES[@]}"; do - BASE_SUFFIX="" - CMD_EFFECTIVE="$CMD" - - if [[ "$ENV" == rocm* ]]; then - if [[ "$MODE" == off ]]; then - BASE_SUFFIX="__hblt0" - CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }" - else - CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }" - fi + # run twice: baseline and with flash attention + for FA in 1; do + SUFFIX="" + EXTRA_ARGS=() + if (( FA == 1 )); then + SUFFIX="__fa1" + EXTRA_ARGS=( -fa 1 ) fi - # run twice: baseline and with flash attention - for FA in 1; do - SUFFIX="$BASE_SUFFIX" - EXTRA_ARGS=() - if (( FA == 1 )); then - SUFFIX="${SUFFIX}__fa1" - EXTRA_ARGS=( -fa 1 ) + for CTX in default longctx32768; do + CTX_SUFFIX="" + CTX_ARGS=() + if [[ "$CTX" == longctx32768 ]]; then + CTX_SUFFIX="__longctx32768" + CTX_ARGS=( -p 2048 -n 32 -d 32768 ) + if [[ "$ENV" == *vulkan* ]]; then + CTX_ARGS+=( -ub 512 ) + else + CTX_ARGS+=( -ub 2048 ) + fi fi - for CTX in default longctx32768; do - CTX_SUFFIX="" - CTX_ARGS=() - if [[ "$CTX" == longctx32768 ]]; then - CTX_SUFFIX="__longctx32768" - CTX_ARGS=( -p 2048 -n 32 -d 32768 ) - if [[ "$ENV" == *vulkan* ]]; then - CTX_ARGS+=( -ub 512 ) - else - CTX_ARGS+=( -ub 2048 ) - fi - fi + OUT="$RESULTDIR/${MODEL_NAME}__${ENV}${SUFFIX}${CTX_SUFFIX}.log" + CTX_REPS=5 + if [[ "$CTX" == longctx32768 ]]; then + CTX_REPS=3 + fi - OUT="$RESULTDIR/${MODEL_NAME}__${ENV}${SUFFIX}${CTX_SUFFIX}.log" - CTX_REPS=5 - if [[ "$CTX" == longctx32768 ]]; then - CTX_REPS=3 - fi + if [[ -s "$OUT" ]]; then + echo "⏩ Skipping [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ ($CTX_SUFFIX)}, log already exists at $OUT" + continue + fi - if [[ -s "$OUT" ]]; then - echo "⏩ Skipping [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ ($CTX_SUFFIX)}, log already exists at $OUT" - continue - fi + FULL_CMD=( $CMD_EFFECTIVE -ngl 99 -mmp 0 -m "$MODEL_PATH" "${EXTRA_ARGS[@]}" "${CTX_ARGS[@]}" -r "$CTX_REPS" ) - FULL_CMD=( $CMD_EFFECTIVE -ngl 99 -mmp 0 -m "$MODEL_PATH" "${EXTRA_ARGS[@]}" "${CTX_ARGS[@]}" -r "$CTX_REPS" ) + printf "\n▶ [%s] %s%s%s\n" "$ENV" "$MODEL_NAME" "${SUFFIX:+ $SUFFIX}" "${CTX_SUFFIX:+ $CTX_SUFFIX}" + printf " → log: %s\n" "$OUT" + printf " → cmd: %s\n\n" "${FULL_CMD[*]}" - printf "\n▶ [%s] %s%s%s\n" "$ENV" "$MODEL_NAME" "${SUFFIX:+ $SUFFIX}" "${CTX_SUFFIX:+ $CTX_SUFFIX}" - printf " → log: %s\n" "$OUT" - printf " → cmd: %s\n\n" "${FULL_CMD[*]}" - - if ! "${FULL_CMD[@]}" >"$OUT" 2>&1; then - status=$? - echo "✖ ! [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} failed (exit ${status})" >>"$OUT" - echo " * [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} : FAILED" - fi - done + if ! "${FULL_CMD[@]}" >"$OUT" 2>&1; then + status=$? + echo "✖ ! [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} failed (exit ${status})" >>"$OUT" + echo " * [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} : FAILED" + fi done done done diff --git a/benchmark/run_rpc_benchmarks.sh b/benchmark/run_rpc_benchmarks.sh index 4a37f99..4db3a89 100755 --- a/benchmark/run_rpc_benchmarks.sh +++ b/benchmark/run_rpc_benchmarks.sh @@ -84,14 +84,6 @@ resolve_model_path() { return 1 } -get_hblt_modes() { - local env="$1" - if [[ "$env" == rocm* ]]; then - printf '%s\n' default off - else - printf '%s\n' default - fi -} ensure_models_exist() { RESOLVED_MODELS=() @@ -141,23 +133,13 @@ has_pending_runs() { start_remote_rpc() { local env="$1" local image="$2" - local mode="$3" - local suffix="$4" + local suffix="$3" local remote_log="/tmp/rpc-server-${env}${suffix}.log" - local env_prefix="" - - if [[ "$env" == rocm* ]]; then - if [[ "$mode" == off ]]; then - env_prefix="env ROCBLAS_USE_HIPBLASLT=0 " - else - env_prefix="env ROCBLAS_USE_HIPBLASLT=1 " - fi - fi ssh -p "$REMOTE_PORT" "$REMOTE_TARGET" 'bash -s' <${remote_log} 2>&1 < /dev/null & +nohup toolbox run -c ${image} -- rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null & echo \$! EOF } @@ -201,7 +183,6 @@ run_llama_bench_rpc() { local model_path="$1" local env="$2" local suffix="$3" - local mode="$4" local model_name model_name="$(basename "${model_path}" .gguf)" local client_cmd="${CLIENT_CMDS[$env]:-}" @@ -216,14 +197,6 @@ run_llama_bench_rpc() { return fi - if [[ "$env" == rocm* ]]; then - if [[ "$mode" == off ]]; then - client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }" - else - client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }" - fi - fi - local -a client_cmd_ary # shellcheck disable=SC2206 # intentional word splitting client_cmd_ary=( $client_cmd ) @@ -284,13 +257,7 @@ run_all() { continue fi - mapfile -t hblt_modes < <(get_hblt_modes "$env") - - for mode in "${hblt_modes[@]}"; do local suffix="" - if [[ "$mode" == off ]]; then - suffix="__hblt0" - fi echo echo "==== ${env}${suffix} -> ${image} ====" @@ -302,7 +269,7 @@ run_all() { CURRENT_REMOTE_ENV="${env}${suffix}" local remote_pid - remote_pid="$(start_remote_rpc "$env" "$image" "$mode" "$suffix" | tr -d '\r')" + remote_pid="$(start_remote_rpc "$env" "$image" "$suffix" | tr -d '\r')" if [[ -z "$remote_pid" ]]; then echo "[ERROR] Failed to start RPC server for ${env}${suffix}" @@ -322,13 +289,12 @@ run_all() { fi for model in "${RESOLVED_MODELS[@]}"; do - run_llama_bench_rpc "$model" "$env" "$suffix" "$mode" + run_llama_bench_rpc "$model" "$env" "$suffix" done stop_remote_rpc "$env" "$remote_pid" || true CURRENT_REMOTE_PID="" CURRENT_REMOTE_ENV="" - done done } diff --git a/docs/assets/index2.css b/docs/assets/index2.css index 0f1a98f..7f2a9af 100644 --- a/docs/assets/index2.css +++ b/docs/assets/index2.css @@ -165,10 +165,7 @@ select { transform: translateY(-2px); } -.backend-item .tag.tag-hblt0 { - background: #e9edff; - color: #1d3ea5; -} + .backend-item .tag.tag-rocwmma { background: #eef9ff; diff --git a/docs/assets/index2.js b/docs/assets/index2.js index d1a75fa..d910fd8 100644 --- a/docs/assets/index2.js +++ b/docs/assets/index2.js @@ -55,9 +55,7 @@ function cacheUI() { stats: document.getElementById("stats-line"), resetBtn: document.getElementById("reset-layout"), tables: document.getElementById("tables"), - hipblasModalOpen: document.getElementById("hipblas-modal-open"), - hipblasModal: document.getElementById("hipblas-modal"), - hipblasModalClose: document.getElementById("hipblas-modal-close"), + rpcModalOpen: document.getElementById("rpc-modal-open"), rpcModal: document.getElementById("rpc-modal"), rpcModalClose: document.getElementById("rpc-modal-close"), @@ -72,11 +70,6 @@ function cacheUI() { function setupModals() { const modalConfigs = [ - { - open: state.ui.hipblasModalOpen, - modal: state.ui.hipblasModal, - close: state.ui.hipblasModalClose, - }, { open: state.ui.rpcModalOpen, modal: state.ui.rpcModal, @@ -668,7 +661,7 @@ function backendValue(entry, direction) { function splitEnvName(env) { const canonical = env.replace(/_/g, "."); - const tagRegex = /-(rocwmma-improved|rocwmma|improved|hblt0)/gi; + const tagRegex = /-(rocwmma-improved|rocwmma|improved)/gi; const tags = []; let match; while ((match = tagRegex.exec(canonical)) !== null) { diff --git a/docs/index.html b/docs/index.html index 27145f1..ac98265 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,9 +19,7 @@
- + @@ -83,26 +81,6 @@
-