refactor: remove hblt0 benchmark support and associated comparison scripts
This commit is contained in:
@@ -1,189 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Compare hipBLASLt-on vs hblt0-off benchmark runs.
|
|
||||||
|
|
||||||
This script inspects docs/results.json (the same dataset consumed by
|
|
||||||
docs/assets/index2.js) and reports, for every backend that was benchmarked
|
|
||||||
both with and without the `-hblt0` suffix, which configuration wins.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import math
|
|
||||||
import statistics
|
|
||||||
from collections import defaultdict
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, Iterable, List, Tuple
|
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_RESULTS = Path("../docs") / "results.json"
|
|
||||||
# Matches the tolerance used in docs/assets/index2.js (MIN_TOL = 0.25)
|
|
||||||
DEFAULT_TOLERANCE = 0.25
|
|
||||||
|
|
||||||
VariantValues = Dict[str, List[float]]
|
|
||||||
BackendMatrix = Dict[Tuple, VariantValues]
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description=(
|
|
||||||
"Pair benchmark runs with and without '-hblt0' and report which "
|
|
||||||
"configuration is faster per backend."
|
|
||||||
)
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--results",
|
|
||||||
type=Path,
|
|
||||||
default=DEFAULT_RESULTS,
|
|
||||||
help="Path to results.json generated by the benchmark pipeline.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--tolerance",
|
|
||||||
type=float,
|
|
||||||
default=DEFAULT_TOLERANCE,
|
|
||||||
help="Minimum tokens/sec delta to treat as a win (default: 0.25).",
|
|
||||||
)
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def load_runs(path: Path) -> Iterable[dict]:
|
|
||||||
data = json.loads(path.read_text())
|
|
||||||
runs = data.get("runs")
|
|
||||||
if not isinstance(runs, list):
|
|
||||||
raise ValueError(f"results.json at {path} does not contain a 'runs' array")
|
|
||||||
return runs
|
|
||||||
|
|
||||||
|
|
||||||
def measurement_key(run: dict) -> Tuple:
|
|
||||||
"""Return a tuple that uniquely identifies a benchmark scenario."""
|
|
||||||
return (
|
|
||||||
(run.get("model_clean") or run.get("model") or "").lower(),
|
|
||||||
run.get("test") or "",
|
|
||||||
run.get("context") or "default",
|
|
||||||
run.get("context_tokens") or 0,
|
|
||||||
(run.get("quant") or "").upper(),
|
|
||||||
run.get("fa"),
|
|
||||||
run.get("rpc"),
|
|
||||||
run.get("ngl"),
|
|
||||||
run.get("backend"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def pair_runs(runs: Iterable[dict]) -> Tuple[Dict[str, BackendMatrix], Dict[str, Dict[str, int]]]:
|
|
||||||
"""
|
|
||||||
Group runs by backend (without / with '-hblt0') and measurement key.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
pairs: backend -> measurement_key -> {'hipblaslt': [...], 'hblt0': [...]}
|
|
||||||
coverage: backend -> {'hipblaslt': raw_run_count, 'hblt0': raw_run_count}
|
|
||||||
"""
|
|
||||||
pairs: Dict[str, BackendMatrix] = defaultdict(lambda: defaultdict(dict))
|
|
||||||
coverage: Dict[str, Dict[str, int]] = defaultdict(lambda: {"hipblaslt": 0, "hblt0": 0})
|
|
||||||
|
|
||||||
for run in runs:
|
|
||||||
env = run.get("env")
|
|
||||||
if not env:
|
|
||||||
continue
|
|
||||||
if run.get("error"):
|
|
||||||
continue
|
|
||||||
tps = run.get("tps_mean")
|
|
||||||
if not isinstance(tps, (int, float)) or math.isnan(tps):
|
|
||||||
continue
|
|
||||||
|
|
||||||
is_hblt0 = env.endswith("-hblt0")
|
|
||||||
base_env = env[:-6] if is_hblt0 else env
|
|
||||||
variant = "hblt0" if is_hblt0 else "hipblaslt"
|
|
||||||
|
|
||||||
key = measurement_key(run)
|
|
||||||
entry = pairs[base_env][key]
|
|
||||||
entry.setdefault(variant, []).append(float(tps))
|
|
||||||
coverage[base_env][variant] += 1
|
|
||||||
|
|
||||||
return pairs, coverage
|
|
||||||
|
|
||||||
|
|
||||||
def summarize_backend(
|
|
||||||
backend: str,
|
|
||||||
matrix: BackendMatrix,
|
|
||||||
tolerance: float,
|
|
||||||
coverage: Dict[str, int],
|
|
||||||
) -> dict | None:
|
|
||||||
pairs: List[Tuple[float, float]] = []
|
|
||||||
|
|
||||||
for entry in matrix.values():
|
|
||||||
if "hipblaslt" not in entry or "hblt0" not in entry:
|
|
||||||
continue
|
|
||||||
hip = statistics.mean(entry["hipblaslt"])
|
|
||||||
hbl = statistics.mean(entry["hblt0"])
|
|
||||||
pairs.append((hip, hbl))
|
|
||||||
|
|
||||||
if not pairs:
|
|
||||||
return None
|
|
||||||
|
|
||||||
hip_wins = sum(1 for hip, hbl in pairs if (hip - hbl) > tolerance)
|
|
||||||
hbl_wins = sum(1 for hip, hbl in pairs if (hbl - hip) > tolerance)
|
|
||||||
ties = len(pairs) - hip_wins - hbl_wins
|
|
||||||
|
|
||||||
avg_hip = statistics.mean(hip for hip, _ in pairs)
|
|
||||||
avg_hbl = statistics.mean(hbl for _, hbl in pairs)
|
|
||||||
avg_delta = avg_hip - avg_hbl
|
|
||||||
pct_delta = (avg_delta / avg_hbl * 100.0) if avg_hbl else float("inf")
|
|
||||||
|
|
||||||
if avg_delta > tolerance:
|
|
||||||
verdict = "hipBLASLt faster"
|
|
||||||
elif avg_delta < -tolerance:
|
|
||||||
verdict = "hblt0 faster"
|
|
||||||
else:
|
|
||||||
verdict = "too close to call"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"backend": backend,
|
|
||||||
"pairs": len(pairs),
|
|
||||||
"hip_wins": hip_wins,
|
|
||||||
"hbl_wins": hbl_wins,
|
|
||||||
"ties": ties,
|
|
||||||
"avg_hip": avg_hip,
|
|
||||||
"avg_hbl": avg_hbl,
|
|
||||||
"avg_delta": avg_delta,
|
|
||||||
"pct_delta": pct_delta,
|
|
||||||
"verdict": verdict,
|
|
||||||
"coverage": coverage,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def format_summary(summary: dict) -> str:
|
|
||||||
cov = summary["coverage"]
|
|
||||||
hip_runs = cov.get("hipblaslt", 0)
|
|
||||||
hbl_runs = cov.get("hblt0", 0)
|
|
||||||
return (
|
|
||||||
f"{summary['backend']}: {summary['verdict']} "
|
|
||||||
f"(Δ {summary['avg_delta']:+.2f} tps / {summary['pct_delta']:+.2f}% "
|
|
||||||
f"across {summary['pairs']} matched cases; "
|
|
||||||
f"hipBLASLt wins {summary['hip_wins']}, hblt0 wins {summary['hbl_wins']}, "
|
|
||||||
f"ties {summary['ties']}; raw runs hipBLASLt={hip_runs}, hblt0={hbl_runs})"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
args = parse_args()
|
|
||||||
runs = load_runs(args.results)
|
|
||||||
matrices, coverage = pair_runs(runs)
|
|
||||||
|
|
||||||
summaries = []
|
|
||||||
for backend in sorted(matrices):
|
|
||||||
summary = summarize_backend(backend, matrices[backend], args.tolerance, coverage.get(backend, {}))
|
|
||||||
if summary:
|
|
||||||
summaries.append(summary)
|
|
||||||
|
|
||||||
if not summaries:
|
|
||||||
print("No matching hipBLASLt vs hblt0 pairs were found.")
|
|
||||||
return
|
|
||||||
|
|
||||||
for summary in summaries:
|
|
||||||
print(format_summary(summary))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -65,7 +65,7 @@ def canonicalize_env(env):
|
|||||||
|
|
||||||
def parse_env_flags(basename):
|
def parse_env_flags(basename):
|
||||||
"""
|
"""
|
||||||
pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
|
pattern: <model>__<env>[__fa1][__longctx32768][__rpc]
|
||||||
Returns (env, fa, context_tag, context_tokens, rpc_flag)
|
Returns (env, fa, context_tag, context_tokens, rpc_flag)
|
||||||
"""
|
"""
|
||||||
parts = basename.split("__")
|
parts = basename.split("__")
|
||||||
@@ -82,8 +82,7 @@ def parse_env_flags(basename):
|
|||||||
suffix = raw_suffix.lower()
|
suffix = raw_suffix.lower()
|
||||||
if suffix == "fa1":
|
if suffix == "fa1":
|
||||||
fa = True
|
fa = True
|
||||||
elif suffix == "hblt0":
|
|
||||||
env = f"{env}-hblt0"
|
|
||||||
elif suffix.startswith("longctx"):
|
elif suffix.startswith("longctx"):
|
||||||
context_tag = suffix
|
context_tag = suffix
|
||||||
m = LONGCTX_RE.search(suffix)
|
m = LONGCTX_RE.search(suffix)
|
||||||
|
|||||||
@@ -68,41 +68,18 @@ declare -A CMDS=(
|
|||||||
[vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench"
|
[vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench"
|
||||||
)
|
)
|
||||||
|
|
||||||
get_hblt_modes() {
|
|
||||||
local env="$1"
|
|
||||||
if [[ "$env" == rocm* ]]; then
|
|
||||||
printf '%s\n' default off
|
|
||||||
else
|
|
||||||
printf '%s\n' default
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
||||||
MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
|
MODEL_NAME="$(basename "$MODEL_PATH" .gguf)"
|
||||||
|
|
||||||
for ENV in "${!CMDS[@]}"; do
|
for ENV in "${!CMDS[@]}"; do
|
||||||
CMD="${CMDS[$ENV]}"
|
CMD_EFFECTIVE="${CMDS[$ENV]}"
|
||||||
mapfile -t HBLT_MODES < <(get_hblt_modes "$ENV")
|
|
||||||
|
|
||||||
for MODE in "${HBLT_MODES[@]}"; do
|
|
||||||
BASE_SUFFIX=""
|
|
||||||
CMD_EFFECTIVE="$CMD"
|
|
||||||
|
|
||||||
if [[ "$ENV" == rocm* ]]; then
|
|
||||||
if [[ "$MODE" == off ]]; then
|
|
||||||
BASE_SUFFIX="__hblt0"
|
|
||||||
CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }"
|
|
||||||
else
|
|
||||||
CMD_EFFECTIVE="${CMD_EFFECTIVE/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# run twice: baseline and with flash attention
|
# run twice: baseline and with flash attention
|
||||||
for FA in 1; do
|
for FA in 1; do
|
||||||
SUFFIX="$BASE_SUFFIX"
|
SUFFIX=""
|
||||||
EXTRA_ARGS=()
|
EXTRA_ARGS=()
|
||||||
if (( FA == 1 )); then
|
if (( FA == 1 )); then
|
||||||
SUFFIX="${SUFFIX}__fa1"
|
SUFFIX="__fa1"
|
||||||
EXTRA_ARGS=( -fa 1 )
|
EXTRA_ARGS=( -fa 1 )
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -145,4 +122,3 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
|
|||||||
done
|
done
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
done
|
|
||||||
|
|||||||
@@ -84,14 +84,6 @@ resolve_model_path() {
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
get_hblt_modes() {
|
|
||||||
local env="$1"
|
|
||||||
if [[ "$env" == rocm* ]]; then
|
|
||||||
printf '%s\n' default off
|
|
||||||
else
|
|
||||||
printf '%s\n' default
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure_models_exist() {
|
ensure_models_exist() {
|
||||||
RESOLVED_MODELS=()
|
RESOLVED_MODELS=()
|
||||||
@@ -141,23 +133,13 @@ has_pending_runs() {
|
|||||||
start_remote_rpc() {
|
start_remote_rpc() {
|
||||||
local env="$1"
|
local env="$1"
|
||||||
local image="$2"
|
local image="$2"
|
||||||
local mode="$3"
|
local suffix="$3"
|
||||||
local suffix="$4"
|
|
||||||
local remote_log="/tmp/rpc-server-${env}${suffix}.log"
|
local remote_log="/tmp/rpc-server-${env}${suffix}.log"
|
||||||
local env_prefix=""
|
|
||||||
|
|
||||||
if [[ "$env" == rocm* ]]; then
|
|
||||||
if [[ "$mode" == off ]]; then
|
|
||||||
env_prefix="env ROCBLAS_USE_HIPBLASLT=0 "
|
|
||||||
else
|
|
||||||
env_prefix="env ROCBLAS_USE_HIPBLASLT=1 "
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
ssh -p "$REMOTE_PORT" "$REMOTE_TARGET" 'bash -s' <<EOF
|
ssh -p "$REMOTE_PORT" "$REMOTE_TARGET" 'bash -s' <<EOF
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
pkill -9 -f rpc-server || true
|
pkill -9 -f rpc-server || true
|
||||||
nohup toolbox run -c ${image} -- ${env_prefix}rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null &
|
nohup toolbox run -c ${image} -- rpc-server -H 0.0.0.0 -p ${RPC_PORT} -c >${remote_log} 2>&1 < /dev/null &
|
||||||
echo \$!
|
echo \$!
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
@@ -201,7 +183,6 @@ run_llama_bench_rpc() {
|
|||||||
local model_path="$1"
|
local model_path="$1"
|
||||||
local env="$2"
|
local env="$2"
|
||||||
local suffix="$3"
|
local suffix="$3"
|
||||||
local mode="$4"
|
|
||||||
local model_name
|
local model_name
|
||||||
model_name="$(basename "${model_path}" .gguf)"
|
model_name="$(basename "${model_path}" .gguf)"
|
||||||
local client_cmd="${CLIENT_CMDS[$env]:-}"
|
local client_cmd="${CLIENT_CMDS[$env]:-}"
|
||||||
@@ -216,14 +197,6 @@ run_llama_bench_rpc() {
|
|||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$env" == rocm* ]]; then
|
|
||||||
if [[ "$mode" == off ]]; then
|
|
||||||
client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=0 }"
|
|
||||||
else
|
|
||||||
client_cmd="${client_cmd/-- /-- env ROCBLAS_USE_HIPBLASLT=1 }"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
local -a client_cmd_ary
|
local -a client_cmd_ary
|
||||||
# shellcheck disable=SC2206 # intentional word splitting
|
# shellcheck disable=SC2206 # intentional word splitting
|
||||||
client_cmd_ary=( $client_cmd )
|
client_cmd_ary=( $client_cmd )
|
||||||
@@ -284,13 +257,7 @@ run_all() {
|
|||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
mapfile -t hblt_modes < <(get_hblt_modes "$env")
|
|
||||||
|
|
||||||
for mode in "${hblt_modes[@]}"; do
|
|
||||||
local suffix=""
|
local suffix=""
|
||||||
if [[ "$mode" == off ]]; then
|
|
||||||
suffix="__hblt0"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo "==== ${env}${suffix} -> ${image} ===="
|
echo "==== ${env}${suffix} -> ${image} ===="
|
||||||
@@ -302,7 +269,7 @@ run_all() {
|
|||||||
|
|
||||||
CURRENT_REMOTE_ENV="${env}${suffix}"
|
CURRENT_REMOTE_ENV="${env}${suffix}"
|
||||||
local remote_pid
|
local remote_pid
|
||||||
remote_pid="$(start_remote_rpc "$env" "$image" "$mode" "$suffix" | tr -d '\r')"
|
remote_pid="$(start_remote_rpc "$env" "$image" "$suffix" | tr -d '\r')"
|
||||||
|
|
||||||
if [[ -z "$remote_pid" ]]; then
|
if [[ -z "$remote_pid" ]]; then
|
||||||
echo "[ERROR] Failed to start RPC server for ${env}${suffix}"
|
echo "[ERROR] Failed to start RPC server for ${env}${suffix}"
|
||||||
@@ -322,14 +289,13 @@ run_all() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
for model in "${RESOLVED_MODELS[@]}"; do
|
for model in "${RESOLVED_MODELS[@]}"; do
|
||||||
run_llama_bench_rpc "$model" "$env" "$suffix" "$mode"
|
run_llama_bench_rpc "$model" "$env" "$suffix"
|
||||||
done
|
done
|
||||||
|
|
||||||
stop_remote_rpc "$env" "$remote_pid" || true
|
stop_remote_rpc "$env" "$remote_pid" || true
|
||||||
CURRENT_REMOTE_PID=""
|
CURRENT_REMOTE_PID=""
|
||||||
CURRENT_REMOTE_ENV=""
|
CURRENT_REMOTE_ENV=""
|
||||||
done
|
done
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
run_all
|
run_all
|
||||||
|
|||||||
@@ -165,10 +165,7 @@ select {
|
|||||||
transform: translateY(-2px);
|
transform: translateY(-2px);
|
||||||
}
|
}
|
||||||
|
|
||||||
.backend-item .tag.tag-hblt0 {
|
|
||||||
background: #e9edff;
|
|
||||||
color: #1d3ea5;
|
|
||||||
}
|
|
||||||
|
|
||||||
.backend-item .tag.tag-rocwmma {
|
.backend-item .tag.tag-rocwmma {
|
||||||
background: #eef9ff;
|
background: #eef9ff;
|
||||||
|
|||||||
@@ -55,9 +55,7 @@ function cacheUI() {
|
|||||||
stats: document.getElementById("stats-line"),
|
stats: document.getElementById("stats-line"),
|
||||||
resetBtn: document.getElementById("reset-layout"),
|
resetBtn: document.getElementById("reset-layout"),
|
||||||
tables: document.getElementById("tables"),
|
tables: document.getElementById("tables"),
|
||||||
hipblasModalOpen: document.getElementById("hipblas-modal-open"),
|
|
||||||
hipblasModal: document.getElementById("hipblas-modal"),
|
|
||||||
hipblasModalClose: document.getElementById("hipblas-modal-close"),
|
|
||||||
rpcModalOpen: document.getElementById("rpc-modal-open"),
|
rpcModalOpen: document.getElementById("rpc-modal-open"),
|
||||||
rpcModal: document.getElementById("rpc-modal"),
|
rpcModal: document.getElementById("rpc-modal"),
|
||||||
rpcModalClose: document.getElementById("rpc-modal-close"),
|
rpcModalClose: document.getElementById("rpc-modal-close"),
|
||||||
@@ -72,11 +70,6 @@ function cacheUI() {
|
|||||||
|
|
||||||
function setupModals() {
|
function setupModals() {
|
||||||
const modalConfigs = [
|
const modalConfigs = [
|
||||||
{
|
|
||||||
open: state.ui.hipblasModalOpen,
|
|
||||||
modal: state.ui.hipblasModal,
|
|
||||||
close: state.ui.hipblasModalClose,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
open: state.ui.rpcModalOpen,
|
open: state.ui.rpcModalOpen,
|
||||||
modal: state.ui.rpcModal,
|
modal: state.ui.rpcModal,
|
||||||
@@ -668,7 +661,7 @@ function backendValue(entry, direction) {
|
|||||||
|
|
||||||
function splitEnvName(env) {
|
function splitEnvName(env) {
|
||||||
const canonical = env.replace(/_/g, ".");
|
const canonical = env.replace(/_/g, ".");
|
||||||
const tagRegex = /-(rocwmma-improved|rocwmma|improved|hblt0)/gi;
|
const tagRegex = /-(rocwmma-improved|rocwmma|improved)/gi;
|
||||||
const tags = [];
|
const tags = [];
|
||||||
let match;
|
let match;
|
||||||
while ((match = tagRegex.exec(canonical)) !== null) {
|
while ((match = tagRegex.exec(canonical)) !== null) {
|
||||||
|
|||||||
+1
-23
@@ -19,9 +19,7 @@
|
|||||||
<div class="legend">
|
<div class="legend">
|
||||||
<label>Legend</label>
|
<label>Legend</label>
|
||||||
<div class="legend-pills">
|
<div class="legend-pills">
|
||||||
<button id="hipblas-modal-open" type="button" class="chip small legend-pill legend-pill-default">
|
|
||||||
hipBLASLt vs hblt0
|
|
||||||
</button>
|
|
||||||
<button id="rpc-modal-open" type="button" class="chip small legend-pill legend-pill-rpc">
|
<button id="rpc-modal-open" type="button" class="chip small legend-pill legend-pill-rpc">
|
||||||
RPC · dual server
|
RPC · dual server
|
||||||
</button>
|
</button>
|
||||||
@@ -83,26 +81,6 @@
|
|||||||
<div id="tables"></div>
|
<div id="tables"></div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<div id="hipblas-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="hipblas-title">
|
|
||||||
<div class="modal-content">
|
|
||||||
<button id="hipblas-modal-close" class="modal-close" aria-label="Close dialog">×</button>
|
|
||||||
<h2 id="hipblas-title">hipBLASLt & hblt0 explained</h2>
|
|
||||||
<p>The ROCm toolboxes ship with <code>ROCBLAS_USE_HIPBLASLT=1</code> by default. This forces rocBLAS to
|
|
||||||
prefer
|
|
||||||
the hipBLASLt kernel library, which historically delivered the best throughput on gfx1151 (Strix Halo).
|
|
||||||
</p>
|
|
||||||
<p>Rows tagged with <code>__hblt0</code> were re-run with <code>ROCBLAS_USE_HIPBLASLT=0</code>, letting
|
|
||||||
rocBLAS
|
|
||||||
auto-select between hipBLASLt, Tensile, or other kernel providers. These runs show how performance
|
|
||||||
shifts when
|
|
||||||
the tuned hipBLASLt path is disabled.</p>
|
|
||||||
<p>hipBLASLt is AMD's LT (low-level tuned) matmul backend, optimized for transformer workloads. Disabling it
|
|
||||||
can
|
|
||||||
expose regressions or improvements depending on driver versions, so both configurations are published
|
|
||||||
for
|
|
||||||
comparison.</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div id="rpc-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rpc-title">
|
<div id="rpc-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rpc-title">
|
||||||
<div class="modal-content">
|
<div class="modal-content">
|
||||||
|
|||||||
Reference in New Issue
Block a user