updated benchmarks
This commit is contained in:
+164
-1
@@ -149,6 +149,7 @@ select {
|
||||
gap: 6px;
|
||||
font-size: 12px;
|
||||
color: var(--ink);
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
.backend-item input {
|
||||
@@ -161,10 +162,29 @@ select {
|
||||
border-radius: 999px;
|
||||
background: #eef2ff;
|
||||
color: #1d3ea5;
|
||||
text-transform: uppercase;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
.backend-item .tag.tag-hblt0 {
|
||||
background: #e9edff;
|
||||
color: #1d3ea5;
|
||||
}
|
||||
|
||||
.backend-item .tag.tag-rocwmma {
|
||||
background: #eef9ff;
|
||||
color: #0a517a;
|
||||
}
|
||||
|
||||
.backend-item .tag.tag-rocwmma-improved {
|
||||
background: #faf3ff;
|
||||
color: #6b1fb7;
|
||||
}
|
||||
|
||||
.backend-item .tag.tag-improved {
|
||||
background: #fef9e7;
|
||||
color: #8a5a00;
|
||||
}
|
||||
|
||||
.stats-box {
|
||||
margin-left: auto;
|
||||
display: flex;
|
||||
@@ -269,6 +289,142 @@ td.model {
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
td.model .model-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.model-pill {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
padding: 2px 8px;
|
||||
border-radius: 999px;
|
||||
font-size: 10px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
background: #eceff5;
|
||||
color: #27303f;
|
||||
border: 1px solid transparent;
|
||||
}
|
||||
|
||||
.model-pill-rpc {
|
||||
background: #fdf2f8;
|
||||
border-color: #fbcfe8;
|
||||
color: #9d174d;
|
||||
}
|
||||
|
||||
.model-pill-rocwmma {
|
||||
background: #eef9ff;
|
||||
border-color: #c7e9ff;
|
||||
color: #0a517a;
|
||||
}
|
||||
|
||||
.legend {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 6px;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.legend label {
|
||||
font-size: 10px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.06em;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.legend-pills {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.legend-pill {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
border-radius: 999px;
|
||||
border: 1px solid transparent;
|
||||
background: #e9edff;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.legend-pill-default {
|
||||
background: #e9edff;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.legend-pill-rpc {
|
||||
background: #fdf2f8;
|
||||
border-color: #fbcfe8;
|
||||
color: #9d174d;
|
||||
}
|
||||
|
||||
.legend-pill-rocwmma {
|
||||
background: #eef9ff;
|
||||
border-color: #c7e9ff;
|
||||
color: #0a517a;
|
||||
}
|
||||
|
||||
.legend-pill-rocwmma-improved {
|
||||
background: #faf3ff;
|
||||
border-color: #e0c8ff;
|
||||
color: #6b1fb7;
|
||||
}
|
||||
|
||||
.modal.hidden {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.modal {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.5);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 20px;
|
||||
z-index: 1000;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
background: #fff;
|
||||
border-radius: 12px;
|
||||
padding: 20px 24px;
|
||||
max-width: 520px;
|
||||
width: 100%;
|
||||
box-shadow: 0 12px 50px rgba(0, 0, 0, 0.2);
|
||||
position: relative;
|
||||
font-size: 13px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.modal-content h2 {
|
||||
margin-top: 0;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
.modal-content p {
|
||||
margin: 8px 0;
|
||||
}
|
||||
|
||||
.modal-close {
|
||||
position: absolute;
|
||||
top: 8px;
|
||||
right: 10px;
|
||||
border: none;
|
||||
background: transparent;
|
||||
font-size: 20px;
|
||||
cursor: pointer;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.modal-close:hover {
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.data-cell {
|
||||
white-space: normal;
|
||||
position: relative;
|
||||
@@ -501,3 +657,10 @@ th.backend-header.drop-target {
|
||||
color: var(--muted);
|
||||
margin-top: 4px;
|
||||
}
|
||||
.modal-content code {
|
||||
font-family: "JetBrains Mono", "SFMono-Regular", Consolas, monospace;
|
||||
background: #f6f8fc;
|
||||
padding: 1px 4px;
|
||||
border-radius: 4px;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
+89
-9
@@ -25,6 +25,7 @@ const state = {
|
||||
|
||||
document.addEventListener("DOMContentLoaded", async () => {
|
||||
cacheUI();
|
||||
setupModals();
|
||||
try {
|
||||
const res = await fetch("results.json");
|
||||
const data = await res.json();
|
||||
@@ -53,9 +54,62 @@ function cacheUI() {
|
||||
stats: document.getElementById("stats-line"),
|
||||
resetBtn: document.getElementById("reset-layout"),
|
||||
tables: document.getElementById("tables"),
|
||||
hipblasModalOpen: document.getElementById("hipblas-modal-open"),
|
||||
hipblasModal: document.getElementById("hipblas-modal"),
|
||||
hipblasModalClose: document.getElementById("hipblas-modal-close"),
|
||||
rpcModalOpen: document.getElementById("rpc-modal-open"),
|
||||
rpcModal: document.getElementById("rpc-modal"),
|
||||
rpcModalClose: document.getElementById("rpc-modal-close"),
|
||||
rocwmmaModalOpen: document.getElementById("rocwmma-modal-open"),
|
||||
rocwmmaModal: document.getElementById("rocwmma-modal"),
|
||||
rocwmmaModalClose: document.getElementById("rocwmma-modal-close"),
|
||||
rocwmmaImprModalOpen: document.getElementById("rocwmma-impr-modal-open"),
|
||||
rocwmmaImprModal: document.getElementById("rocwmma-impr-modal"),
|
||||
rocwmmaImprModalClose: document.getElementById("rocwmma-impr-modal-close"),
|
||||
};
|
||||
}
|
||||
|
||||
function setupModals() {
|
||||
const modalConfigs = [
|
||||
{
|
||||
open: state.ui.hipblasModalOpen,
|
||||
modal: state.ui.hipblasModal,
|
||||
close: state.ui.hipblasModalClose,
|
||||
},
|
||||
{
|
||||
open: state.ui.rpcModalOpen,
|
||||
modal: state.ui.rpcModal,
|
||||
close: state.ui.rpcModalClose,
|
||||
},
|
||||
{
|
||||
open: state.ui.rocwmmaModalOpen,
|
||||
modal: state.ui.rocwmmaModal,
|
||||
close: state.ui.rocwmmaModalClose,
|
||||
},
|
||||
{
|
||||
open: state.ui.rocwmmaImprModalOpen,
|
||||
modal: state.ui.rocwmmaImprModal,
|
||||
close: state.ui.rocwmmaImprModalClose,
|
||||
},
|
||||
];
|
||||
|
||||
modalConfigs.forEach(({ open, modal, close }) => {
|
||||
if (!open || !modal) return;
|
||||
const openModal = () => modal.classList.remove("hidden");
|
||||
const closeModal = () => modal.classList.add("hidden");
|
||||
open.addEventListener("click", openModal);
|
||||
close?.addEventListener("click", closeModal);
|
||||
modal.addEventListener("click", (e) => {
|
||||
if (e.target === modal) closeModal();
|
||||
});
|
||||
document.addEventListener("keydown", (e) => {
|
||||
if (e.key === "Escape" && !modal.classList.contains("hidden")) {
|
||||
closeModal();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function prepareData(runs) {
|
||||
const contextMap = new Map();
|
||||
const envSet = new Set();
|
||||
@@ -132,6 +186,7 @@ function ensureModel(testEntry, modelName, run) {
|
||||
quant: (run.quant || "Unknown").toUpperCase(),
|
||||
sizeB: run.name_params_b ?? run.params_b ?? null,
|
||||
backends: {},
|
||||
isRpc: Boolean(run.rpc),
|
||||
search_blob: [modelName, run.quant, run.env, run.test]
|
||||
.filter(Boolean)
|
||||
.map((s) => s.toString().toLowerCase())
|
||||
@@ -147,6 +202,12 @@ function ensureModel(testEntry, modelName, run) {
|
||||
state.sizeStats.min = Math.min(state.sizeStats.min, row.sizeB);
|
||||
state.sizeStats.max = Math.max(state.sizeStats.max, row.sizeB);
|
||||
}
|
||||
if (run.rpc) {
|
||||
row.isRpc = true;
|
||||
if (!row.search_blob.includes("rpc")) {
|
||||
row.search_blob = `${row.search_blob} rpc`;
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
@@ -259,6 +320,8 @@ function renderBackendList() {
|
||||
const pill = document.createElement("span");
|
||||
pill.className = "tag";
|
||||
pill.textContent = tag;
|
||||
const safeTag = tag.replace(/[^a-z0-9]+/gi, "-").toLowerCase();
|
||||
pill.classList.add(`tag-${safeTag}`);
|
||||
label.appendChild(pill);
|
||||
});
|
||||
|
||||
@@ -393,7 +456,24 @@ function buildSingleTable(models, backendList) {
|
||||
const tr = document.createElement("tr");
|
||||
const tdModel = document.createElement("td");
|
||||
tdModel.className = "model";
|
||||
tdModel.innerHTML = `<div>${model.model}</div><div class="meta">${model.quant} · ${formatSize(model.sizeB)}</div>`;
|
||||
const head = document.createElement("div");
|
||||
head.className = "model-head";
|
||||
const nameSpan = document.createElement("span");
|
||||
nameSpan.className = "model-name";
|
||||
nameSpan.textContent = model.model;
|
||||
head.appendChild(nameSpan);
|
||||
if (model.isRpc) {
|
||||
const pill = document.createElement("span");
|
||||
pill.className = "model-pill model-pill-rpc";
|
||||
pill.title = "Run executed via llama.cpp RPC across two servers";
|
||||
pill.textContent = "RPC · dual server";
|
||||
head.appendChild(pill);
|
||||
}
|
||||
tdModel.appendChild(head);
|
||||
const meta = document.createElement("div");
|
||||
meta.className = "meta";
|
||||
meta.textContent = `${model.quant} · ${formatSize(model.sizeB)}`;
|
||||
tdModel.appendChild(meta);
|
||||
|
||||
const actionWrap = document.createElement("div");
|
||||
actionWrap.className = "row-actions";
|
||||
@@ -586,14 +666,14 @@ function backendValue(entry, direction) {
|
||||
}
|
||||
|
||||
function splitEnvName(env) {
|
||||
const parts = env.split(/-(?=rocwmma|improved|hblt0)/g);
|
||||
if (parts.length === 1) return { base: env, tags: [] };
|
||||
const base = parts[0];
|
||||
const tags = env
|
||||
.slice(base.length)
|
||||
.split("-")
|
||||
.filter(Boolean)
|
||||
.map((t) => t.toUpperCase());
|
||||
const canonical = env.replace(/_/g, ".");
|
||||
const tagRegex = /-(rocwmma-improved|rocwmma|improved|hblt0)/gi;
|
||||
const tags = [];
|
||||
let match;
|
||||
while ((match = tagRegex.exec(canonical)) !== null) {
|
||||
tags.push(match[1].toLowerCase());
|
||||
}
|
||||
const base = canonical.replace(tagRegex, "");
|
||||
return { base, tags };
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,23 @@
|
||||
<p>Fedora 42 · Linux 6.18.0-0.rc5.243.vanilla.fc42.x86_64 · llama.cpp build 1c398dc9e (7034)</p>
|
||||
<p>Benchmarks captured 14 Nov 2025 · Repo: <a href="https://github.com/kyuz0/amd-strix-halo-toolboxes"
|
||||
target="_blank" rel="noreferrer">kyuz0/amd-strix-halo-toolboxes</a></p>
|
||||
<div class="legend">
|
||||
<label>Legend</label>
|
||||
<div class="legend-pills">
|
||||
<button id="hipblas-modal-open" type="button" class="chip small legend-pill legend-pill-default">
|
||||
hipBLASLt vs hblt0
|
||||
</button>
|
||||
<button id="rpc-modal-open" type="button" class="chip small legend-pill legend-pill-rpc">
|
||||
RPC · dual server
|
||||
</button>
|
||||
<button id="rocwmma-modal-open" type="button" class="chip small legend-pill legend-pill-rocwmma">
|
||||
rocWMMA
|
||||
</button>
|
||||
<button id="rocwmma-impr-modal-open" type="button" class="chip small legend-pill legend-pill-rocwmma-improved">
|
||||
rocWMMA-improved
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<section class="controls">
|
||||
@@ -68,6 +85,56 @@
|
||||
<div id="tables"></div>
|
||||
</section>
|
||||
|
||||
<div id="hipblas-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="hipblas-title">
|
||||
<div class="modal-content">
|
||||
<button id="hipblas-modal-close" class="modal-close" aria-label="Close dialog">×</button>
|
||||
<h2 id="hipblas-title">hipBLASLt & hblt0 explained</h2>
|
||||
<p>The ROCm toolboxes ship with <code>ROCBLAS_USE_HIPBLASLT=1</code> by default. This forces rocBLAS to prefer
|
||||
the hipBLASLt kernel library, which historically delivered the best throughput on gfx1151 (Strix Halo).</p>
|
||||
<p>Rows tagged with <code>__hblt0</code> were re-run with <code>ROCBLAS_USE_HIPBLASLT=0</code>, letting rocBLAS
|
||||
auto-select between hipBLASLt, Tensile, or other kernel providers. These runs show how performance shifts when
|
||||
the tuned hipBLASLt path is disabled.</p>
|
||||
<p>hipBLASLt is AMD's LT (low-level tuned) matmul backend, optimized for transformer workloads. Disabling it can
|
||||
expose regressions or improvements depending on driver versions, so both configurations are published for
|
||||
comparison.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="rpc-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rpc-title">
|
||||
<div class="modal-content">
|
||||
<button id="rpc-modal-close" class="modal-close" aria-label="Close dialog">×</button>
|
||||
<h2 id="rpc-title">RPC · dual server</h2>
|
||||
<p>These results were produced with two Strix Halo systems (Framework Desktop + HP G1a workstation, each 128 GB)
|
||||
connected over 5 Gbps Ethernet. One runs <code>rpc-server</code> from llama.cpp; the other runs
|
||||
<code>llama-bench --rpc</code>.</p>
|
||||
<p>This setup allows distributed inference, splitting large GGUF models across both machines. The metric shows what
|
||||
you can expect when latency is limited by the network and the workload is balanced between two RPC participants.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="rocwmma-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rocwmma-title">
|
||||
<div class="modal-content">
|
||||
<button id="rocwmma-modal-close" class="modal-close" aria-label="Close dialog">×</button>
|
||||
<h2 id="rocwmma-title">rocWMMA variants</h2>
|
||||
<p>Backends labeled <code>-rocwmma</code> are rebuilt with AMD's rocWMMA library, which unlocks matrix multiply
|
||||
pipelines accelerated via wave matrix multiply-accumulate (WMMA) instructions.</p>
|
||||
<p>rocWMMA kernels can significantly accelerate BF16/F16 workloads on RDNA3 but may trade stability or memory
|
||||
usage; comparing plain toolboxes against <code>-rocwmma</code> ones highlights the benefit or cost.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="rocwmma-impr-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rocwmma-impr-title">
|
||||
<div class="modal-content">
|
||||
<button id="rocwmma-impr-modal-close" class="modal-close" aria-label="Close dialog">×</button>
|
||||
<h2 id="rocwmma-impr-title">rocWMMA-improved builds</h2>
|
||||
<p>Toolboxes tagged <code>-rocwmma-improved</code> bake in an experimental llama.cpp patch that retunes rocWMMA
|
||||
kernels for long-context throughput on Strix Halo.</p>
|
||||
<p>Patch reference: <a href="https://github.com/hjc4869/llama.cpp/commit/12bb5c371bd3c647ef75e8e13de9e311edba604d"
|
||||
target="_blank" rel="noreferrer">12bb5c371bd3</a>. These builds often run faster for 32k+ contexts, but
|
||||
the changes are not upstream and may be unstable.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="assets/index2.js" type="module"></script>
|
||||
</body>
|
||||
|
||||
|
||||
+11055
-1
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user