refactor: remove hblt0 benchmark support and associated comparison scripts

This commit is contained in:
Donato Capitella
2026-04-10 11:23:06 +01:00
parent 5acf54cd67
commit c129a04a1c
7 changed files with 47 additions and 327 deletions
-189
View File
@@ -1,189 +0,0 @@
#!/usr/bin/env python3
"""
Compare hipBLASLt-on vs hblt0-off benchmark runs.
This script inspects docs/results.json (the same dataset consumed by
docs/assets/index2.js) and reports, for every backend that was benchmarked
both with and without the `-hblt0` suffix, which configuration wins.
"""
from __future__ import annotations
import argparse
import json
import math
import statistics
from collections import defaultdict
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
DEFAULT_RESULTS = Path("../docs") / "results.json"
# Matches the tolerance used in docs/assets/index2.js (MIN_TOL = 0.25)
DEFAULT_TOLERANCE = 0.25
VariantValues = Dict[str, List[float]]
BackendMatrix = Dict[Tuple, VariantValues]
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=(
"Pair benchmark runs with and without '-hblt0' and report which "
"configuration is faster per backend."
)
)
parser.add_argument(
"--results",
type=Path,
default=DEFAULT_RESULTS,
help="Path to results.json generated by the benchmark pipeline.",
)
parser.add_argument(
"--tolerance",
type=float,
default=DEFAULT_TOLERANCE,
help="Minimum tokens/sec delta to treat as a win (default: 0.25).",
)
return parser.parse_args()
def load_runs(path: Path) -> Iterable[dict]:
data = json.loads(path.read_text())
runs = data.get("runs")
if not isinstance(runs, list):
raise ValueError(f"results.json at {path} does not contain a 'runs' array")
return runs
def measurement_key(run: dict) -> Tuple:
"""Return a tuple that uniquely identifies a benchmark scenario."""
return (
(run.get("model_clean") or run.get("model") or "").lower(),
run.get("test") or "",
run.get("context") or "default",
run.get("context_tokens") or 0,
(run.get("quant") or "").upper(),
run.get("fa"),
run.get("rpc"),
run.get("ngl"),
run.get("backend"),
)
def pair_runs(runs: Iterable[dict]) -> Tuple[Dict[str, BackendMatrix], Dict[str, Dict[str, int]]]:
"""
Group runs by backend (without / with '-hblt0') and measurement key.
Returns:
pairs: backend -> measurement_key -> {'hipblaslt': [...], 'hblt0': [...]}
coverage: backend -> {'hipblaslt': raw_run_count, 'hblt0': raw_run_count}
"""
pairs: Dict[str, BackendMatrix] = defaultdict(lambda: defaultdict(dict))
coverage: Dict[str, Dict[str, int]] = defaultdict(lambda: {"hipblaslt": 0, "hblt0": 0})
for run in runs:
env = run.get("env")
if not env:
continue
if run.get("error"):
continue
tps = run.get("tps_mean")
if not isinstance(tps, (int, float)) or math.isnan(tps):
continue
is_hblt0 = env.endswith("-hblt0")
base_env = env[:-6] if is_hblt0 else env
variant = "hblt0" if is_hblt0 else "hipblaslt"
key = measurement_key(run)
entry = pairs[base_env][key]
entry.setdefault(variant, []).append(float(tps))
coverage[base_env][variant] += 1
return pairs, coverage
def summarize_backend(
backend: str,
matrix: BackendMatrix,
tolerance: float,
coverage: Dict[str, int],
) -> dict | None:
pairs: List[Tuple[float, float]] = []
for entry in matrix.values():
if "hipblaslt" not in entry or "hblt0" not in entry:
continue
hip = statistics.mean(entry["hipblaslt"])
hbl = statistics.mean(entry["hblt0"])
pairs.append((hip, hbl))
if not pairs:
return None
hip_wins = sum(1 for hip, hbl in pairs if (hip - hbl) > tolerance)
hbl_wins = sum(1 for hip, hbl in pairs if (hbl - hip) > tolerance)
ties = len(pairs) - hip_wins - hbl_wins
avg_hip = statistics.mean(hip for hip, _ in pairs)
avg_hbl = statistics.mean(hbl for _, hbl in pairs)
avg_delta = avg_hip - avg_hbl
pct_delta = (avg_delta / avg_hbl * 100.0) if avg_hbl else float("inf")
if avg_delta > tolerance:
verdict = "hipBLASLt faster"
elif avg_delta < -tolerance:
verdict = "hblt0 faster"
else:
verdict = "too close to call"
return {
"backend": backend,
"pairs": len(pairs),
"hip_wins": hip_wins,
"hbl_wins": hbl_wins,
"ties": ties,
"avg_hip": avg_hip,
"avg_hbl": avg_hbl,
"avg_delta": avg_delta,
"pct_delta": pct_delta,
"verdict": verdict,
"coverage": coverage,
}
def format_summary(summary: dict) -> str:
cov = summary["coverage"]
hip_runs = cov.get("hipblaslt", 0)
hbl_runs = cov.get("hblt0", 0)
return (
f"{summary['backend']}: {summary['verdict']} "
f"{summary['avg_delta']:+.2f} tps / {summary['pct_delta']:+.2f}% "
f"across {summary['pairs']} matched cases; "
f"hipBLASLt wins {summary['hip_wins']}, hblt0 wins {summary['hbl_wins']}, "
f"ties {summary['ties']}; raw runs hipBLASLt={hip_runs}, hblt0={hbl_runs})"
)
def main() -> None:
args = parse_args()
runs = load_runs(args.results)
matrices, coverage = pair_runs(runs)
summaries = []
for backend in sorted(matrices):
summary = summarize_backend(backend, matrices[backend], args.tolerance, coverage.get(backend, {}))
if summary:
summaries.append(summary)
if not summaries:
print("No matching hipBLASLt vs hblt0 pairs were found.")
return
for summary in summaries:
print(format_summary(summary))
if __name__ == "__main__":
main()
+2 -3
View File
@@ -65,7 +65,7 @@ def canonicalize_env(env):
def parse_env_flags(basename):
"""
pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
pattern: <model>__<env>[__fa1][__longctx32768][__rpc]
Returns (env, fa, context_tag, context_tokens, rpc_flag)
"""
parts = basename.split("__")
@@ -82,8 +82,7 @@ def parse_env_flags(basename):
suffix = raw_suffix.lower()
if suffix == "fa1":
fa = True
elif suffix == "hblt0":
env = f"{env}-hblt0"
elif suffix.startswith("longctx"):
context_tag = suffix
m = LONGCTX_RE.search(suffix)
+37 -61
View File
@@ -68,80 +68,56 @@ declare -A CMDS=(
[vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench"
)
get_hblt_modes() {
local env="$1"
if [[ "$env" == rocm* ]]; then
printf '%s\n' default off
else
printf '%s\n' default
fi
}
for MODEL_PATH in "${MODEL_PATHS[@]}"; do
MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
for ENV in "${!CMDS[@]}"; do
CMD="${CMDS[$ENV]}"
mapfile -t HBLT_MODES < <(get_hblt_modes "$ENV")
CMD_EFFECTIVE="${CMDS[$ENV]}"
for MODE in "${HBLT_MODES[@]}"; do
BASE_SUFFIX=""
CMD_EFFECTIVE="$CMD"
if [[ "$ENV" == rocm* ]]; then
if [[ "$MODE" == off ]]; then
BASE_SUFFIX="__hblt0"
CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }"
else
CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }"
fi
# run twice: baseline and with flash attention
for FA in 1; do
SUFFIX=""
EXTRA_ARGS=()
if (( FA == 1 )); then
SUFFIX="__fa1"
EXTRA_ARGS=( -fa 1 )
fi
# run twice: baseline and with flash attention
for FA in 1; do
SUFFIX="$BASE_SUFFIX"
EXTRA_ARGS=()
if (( FA == 1 )); then
SUFFIX="${SUFFIX}__fa1"
EXTRA_ARGS=( -fa 1 )
for CTX in default longctx32768; do
CTX_SUFFIX=""
CTX_ARGS=()
if [[ "$CTX" == longctx32768 ]]; then
CTX_SUFFIX="__longctx32768"
CTX_ARGS=( -p 2048 -n 32 -d 32768 )
if [[ "$ENV" == *vulkan* ]]; then
CTX_ARGS+=( -ub 512 )
else
CTX_ARGS+=( -ub 2048 )
fi
fi
for CTX in default longctx32768; do
CTX_SUFFIX=""
CTX_ARGS=()
if [[ "$CTX" == longctx32768 ]]; then
CTX_SUFFIX="__longctx32768"
CTX_ARGS=( -p 2048 -n 32 -d 32768 )
if [[ "$ENV" == *vulkan* ]]; then
CTX_ARGS+=( -ub 512 )
else
CTX_ARGS+=( -ub 2048 )
fi
fi
OUT="$RESULTDIR/${MODEL_NAME}__${ENV}${SUFFIX}${CTX_SUFFIX}.log"
CTX_REPS=5
if [[ "$CTX" == longctx32768 ]]; then
CTX_REPS=3
fi
OUT="$RESULTDIR/${MODEL_NAME}__${ENV}${SUFFIX}${CTX_SUFFIX}.log"
CTX_REPS=5
if [[ "$CTX" == longctx32768 ]]; then
CTX_REPS=3
fi
if [[ -s "$OUT" ]]; then
echo "⏩ Skipping [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ ($CTX_SUFFIX)}, log already exists at $OUT"
continue
fi
if [[ -s "$OUT" ]]; then
echo "⏩ Skipping [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ ($CTX_SUFFIX)}, log already exists at $OUT"
continue
fi
FULL_CMD=( $CMD_EFFECTIVE -ngl 99 -mmp 0 -m "$MODEL_PATH" "${EXTRA_ARGS[@]}" "${CTX_ARGS[@]}" -r "$CTX_REPS" )
FULL_CMD=( $CMD_EFFECTIVE -ngl 99 -mmp 0 -m "$MODEL_PATH" "${EXTRA_ARGS[@]}" "${CTX_ARGS[@]}" -r "$CTX_REPS" )
printf "\n▶ [%s] %s%s%s\n" "$ENV" "$MODEL_NAME" "${SUFFIX:+ $SUFFIX}" "${CTX_SUFFIX:+ $CTX_SUFFIX}"
printf " → log: %s\n" "$OUT"
printf " → cmd: %s\n\n" "${FULL_CMD[*]}"
printf "\n▶ [%s] %s%s%s\n" "$ENV" "$MODEL_NAME" "${SUFFIX:+ $SUFFIX}" "${CTX_SUFFIX:+ $CTX_SUFFIX}"
printf " → log: %s\n" "$OUT"
printf " → cmd: %s\n\n" "${FULL_CMD[*]}"
if ! "${FULL_CMD[@]}" >"$OUT" 2>&1; then
status=$?
echo "✖ ! [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} failed (exit ${status})" >>"$OUT"
echo " * [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} : FAILED"
fi
done
if ! "${FULL_CMD[@]}" >"$OUT" 2>&1; then
status=$?
echo "✖ ! [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} failed (exit ${status})" >>"$OUT"
echo " * [${ENV}] ${MODEL_NAME}${SUFFIX}${CTX_SUFFIX:+ $CTX_SUFFIX} : FAILED"
fi
done
done
done
+4 -38
View File
@@ -84,14 +84,6 @@ resolve_model_path() {
return 1
}
get_hblt_modes() {
local env="$1"
if [[ "$env" == rocm* ]]; then
printf '%s\n' default off
else
printf '%s\n' default
fi
}
ensure_models_exist() {
RESOLVED_MODELS=()
@@ -141,23 +133,13 @@ has_pending_runs() {
start_remote_rpc() {
local env="$1"
local image="$2"
local mode="$3"
local suffix="$4"
local suffix="$3"
local remote_log="/tmp/rpc-server-${env}${suffix}.log"
local env_prefix=""
if [[ "$env" == rocm* ]]; then
if [[ "$mode" == off ]]; then
env_prefix="env ROCBLAS_USE_HIPBLASLT=0 "
else
env_prefix="env ROCBLAS_USE_HIPBLASLT=1 "
fi
fi
ssh -p "$REMOTE_PORT" "$REMOTE_TARGET" 'bash -s' <<EOF
set -euo pipefail
pkill -9 -f rpc-server || true
nohup toolbox run -c ${image} -- ${env_prefix}rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null &
nohup toolbox run -c ${image} -- rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null &
echo \$!
EOF
}
@@ -201,7 +183,6 @@ run_llama_bench_rpc() {
local model_path="$1"
local env="$2"
local suffix="$3"
local mode="$4"
local model_name
model_name="$(basename "${model_path}" .gguf)"
local client_cmd="${CLIENT_CMDS[$env]:-}"
@@ -216,14 +197,6 @@ run_llama_bench_rpc() {
return
fi
if [[ "$env" == rocm* ]]; then
if [[ "$mode" == off ]]; then
client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }"
else
client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }"
fi
fi
local -a client_cmd_ary
# shellcheck disable=SC2206 # intentional word splitting
client_cmd_ary=( $client_cmd )
@@ -284,13 +257,7 @@ run_all() {
continue
fi
mapfile -t hblt_modes < <(get_hblt_modes "$env")
for mode in "${hblt_modes[@]}"; do
local suffix=""
if [[ "$mode" == off ]]; then
suffix="__hblt0"
fi
echo
echo "==== ${env}${suffix} -> ${image} ===="
@@ -302,7 +269,7 @@ run_all() {
CURRENT_REMOTE_ENV="${env}${suffix}"
local remote_pid
remote_pid="$(start_remote_rpc "$env" "$image" "$mode" "$suffix" | tr -d '\r')"
remote_pid="$(start_remote_rpc "$env" "$image" "$suffix" | tr -d '\r')"
if [[ -z "$remote_pid" ]]; then
echo "[ERROR] Failed to start RPC server for ${env}${suffix}"
@@ -322,13 +289,12 @@ run_all() {
fi
for model in "${RESOLVED_MODELS[@]}"; do
run_llama_bench_rpc "$model" "$env" "$suffix" "$mode"
run_llama_bench_rpc "$model" "$env" "$suffix"
done
stop_remote_rpc "$env" "$remote_pid" || true
CURRENT_REMOTE_PID=""
CURRENT_REMOTE_ENV=""
done
done
}