updated benchmarks

This commit is contained in:
Donato Capitella
2025-11-17 23:02:56 +00:00
parent ad32126872
commit 1d6d48fae1
192 changed files with 13571 additions and 107 deletions
+164 -1
View File
@@ -149,6 +149,7 @@ select {
gap: 6px;
font-size: 12px;
color: var(--ink);
text-transform: none;
}
.backend-item input {
@@ -161,10 +162,29 @@ select {
border-radius: 999px;
background: #eef2ff;
color: #1d3ea5;
text-transform: uppercase;
transform: translateY(-2px);
}
.backend-item .tag.tag-hblt0 {
background: #e9edff;
color: #1d3ea5;
}
.backend-item .tag.tag-rocwmma {
background: #eef9ff;
color: #0a517a;
}
.backend-item .tag.tag-rocwmma-improved {
background: #faf3ff;
color: #6b1fb7;
}
.backend-item .tag.tag-improved {
background: #fef9e7;
color: #8a5a00;
}
.stats-box {
margin-left: auto;
display: flex;
@@ -269,6 +289,142 @@ td.model {
font-weight: 500;
}
td.model .model-head {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: 6px;
}
.model-pill {
display: inline-flex;
align-items: center;
padding: 2px 8px;
border-radius: 999px;
font-size: 10px;
text-transform: uppercase;
letter-spacing: 0.05em;
background: #eceff5;
color: #27303f;
border: 1px solid transparent;
}
.model-pill-rpc {
background: #fdf2f8;
border-color: #fbcfe8;
color: #9d174d;
}
.model-pill-rocwmma {
background: #eef9ff;
border-color: #c7e9ff;
color: #0a517a;
}
.legend {
display: flex;
flex-direction: column;
gap: 6px;
margin-top: 8px;
}
.legend label {
font-size: 10px;
text-transform: uppercase;
letter-spacing: 0.06em;
color: var(--muted);
}
.legend-pills {
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.legend-pill {
display: inline-flex;
align-items: center;
gap: 4px;
border-radius: 999px;
border: 1px solid transparent;
background: #e9edff;
color: var(--ink);
}
.legend-pill-default {
background: #e9edff;
color: var(--ink);
}
.legend-pill-rpc {
background: #fdf2f8;
border-color: #fbcfe8;
color: #9d174d;
}
.legend-pill-rocwmma {
background: #eef9ff;
border-color: #c7e9ff;
color: #0a517a;
}
.legend-pill-rocwmma-improved {
background: #faf3ff;
border-color: #e0c8ff;
color: #6b1fb7;
}
.modal.hidden {
display: none;
}
.modal {
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.5);
display: flex;
align-items: center;
justify-content: center;
padding: 20px;
z-index: 1000;
}
.modal-content {
background: #fff;
border-radius: 12px;
padding: 20px 24px;
max-width: 520px;
width: 100%;
box-shadow: 0 12px 50px rgba(0, 0, 0, 0.2);
position: relative;
font-size: 13px;
line-height: 1.4;
}
.modal-content h2 {
margin-top: 0;
font-size: 16px;
}
.modal-content p {
margin: 8px 0;
}
.modal-close {
position: absolute;
top: 8px;
right: 10px;
border: none;
background: transparent;
font-size: 20px;
cursor: pointer;
color: var(--muted);
}
.modal-close:hover {
color: var(--ink);
}
.data-cell {
white-space: normal;
position: relative;
@@ -501,3 +657,10 @@ th.backend-header.drop-target {
color: var(--muted);
margin-top: 4px;
}
.modal-content code {
font-family: "JetBrains Mono", "SFMono-Regular", Consolas, monospace;
background: #f6f8fc;
padding: 1px 4px;
border-radius: 4px;
font-size: 12px;
}
+89 -9
View File
@@ -25,6 +25,7 @@ const state = {
document.addEventListener("DOMContentLoaded", async () => {
cacheUI();
setupModals();
try {
const res = await fetch("results.json");
const data = await res.json();
@@ -53,9 +54,62 @@ function cacheUI() {
stats: document.getElementById("stats-line"),
resetBtn: document.getElementById("reset-layout"),
tables: document.getElementById("tables"),
hipblasModalOpen: document.getElementById("hipblas-modal-open"),
hipblasModal: document.getElementById("hipblas-modal"),
hipblasModalClose: document.getElementById("hipblas-modal-close"),
rpcModalOpen: document.getElementById("rpc-modal-open"),
rpcModal: document.getElementById("rpc-modal"),
rpcModalClose: document.getElementById("rpc-modal-close"),
rocwmmaModalOpen: document.getElementById("rocwmma-modal-open"),
rocwmmaModal: document.getElementById("rocwmma-modal"),
rocwmmaModalClose: document.getElementById("rocwmma-modal-close"),
rocwmmaImprModalOpen: document.getElementById("rocwmma-impr-modal-open"),
rocwmmaImprModal: document.getElementById("rocwmma-impr-modal"),
rocwmmaImprModalClose: document.getElementById("rocwmma-impr-modal-close"),
};
}
function setupModals() {
const modalConfigs = [
{
open: state.ui.hipblasModalOpen,
modal: state.ui.hipblasModal,
close: state.ui.hipblasModalClose,
},
{
open: state.ui.rpcModalOpen,
modal: state.ui.rpcModal,
close: state.ui.rpcModalClose,
},
{
open: state.ui.rocwmmaModalOpen,
modal: state.ui.rocwmmaModal,
close: state.ui.rocwmmaModalClose,
},
{
open: state.ui.rocwmmaImprModalOpen,
modal: state.ui.rocwmmaImprModal,
close: state.ui.rocwmmaImprModalClose,
},
];
modalConfigs.forEach(({ open, modal, close }) => {
if (!open || !modal) return;
const openModal = () => modal.classList.remove("hidden");
const closeModal = () => modal.classList.add("hidden");
open.addEventListener("click", openModal);
close?.addEventListener("click", closeModal);
modal.addEventListener("click", (e) => {
if (e.target === modal) closeModal();
});
document.addEventListener("keydown", (e) => {
if (e.key === "Escape" && !modal.classList.contains("hidden")) {
closeModal();
}
});
});
}
function prepareData(runs) {
const contextMap = new Map();
const envSet = new Set();
@@ -132,6 +186,7 @@ function ensureModel(testEntry, modelName, run) {
quant: (run.quant || "Unknown").toUpperCase(),
sizeB: run.name_params_b ?? run.params_b ?? null,
backends: {},
isRpc: Boolean(run.rpc),
search_blob: [modelName, run.quant, run.env, run.test]
.filter(Boolean)
.map((s) => s.toString().toLowerCase())
@@ -147,6 +202,12 @@ function ensureModel(testEntry, modelName, run) {
state.sizeStats.min = Math.min(state.sizeStats.min, row.sizeB);
state.sizeStats.max = Math.max(state.sizeStats.max, row.sizeB);
}
if (run.rpc) {
row.isRpc = true;
if (!row.search_blob.includes("rpc")) {
row.search_blob = `${row.search_blob} rpc`;
}
}
return row;
}
@@ -259,6 +320,8 @@ function renderBackendList() {
const pill = document.createElement("span");
pill.className = "tag";
pill.textContent = tag;
const safeTag = tag.replace(/[^a-z0-9]+/gi, "-").toLowerCase();
pill.classList.add(`tag-${safeTag}`);
label.appendChild(pill);
});
@@ -393,7 +456,24 @@ function buildSingleTable(models, backendList) {
const tr = document.createElement("tr");
const tdModel = document.createElement("td");
tdModel.className = "model";
tdModel.innerHTML = `<div>${model.model}</div><div class="meta">${model.quant} · ${formatSize(model.sizeB)}</div>`;
const head = document.createElement("div");
head.className = "model-head";
const nameSpan = document.createElement("span");
nameSpan.className = "model-name";
nameSpan.textContent = model.model;
head.appendChild(nameSpan);
if (model.isRpc) {
const pill = document.createElement("span");
pill.className = "model-pill model-pill-rpc";
pill.title = "Run executed via llama.cpp RPC across two servers";
pill.textContent = "RPC · dual server";
head.appendChild(pill);
}
tdModel.appendChild(head);
const meta = document.createElement("div");
meta.className = "meta";
meta.textContent = `${model.quant} · ${formatSize(model.sizeB)}`;
tdModel.appendChild(meta);
const actionWrap = document.createElement("div");
actionWrap.className = "row-actions";
@@ -586,14 +666,14 @@ function backendValue(entry, direction) {
}
function splitEnvName(env) {
const parts = env.split(/-(?=rocwmma|improved|hblt0)/g);
if (parts.length === 1) return { base: env, tags: [] };
const base = parts[0];
const tags = env
.slice(base.length)
.split("-")
.filter(Boolean)
.map((t) => t.toUpperCase());
const canonical = env.replace(/_/g, ".");
const tagRegex = /-(rocwmma-improved|rocwmma|improved|hblt0)/gi;
const tags = [];
let match;
while ((match = tagRegex.exec(canonical)) !== null) {
tags.push(match[1].toLowerCase());
}
const base = canonical.replace(tagRegex, "");
return { base, tags };
}
+67
View File
@@ -15,6 +15,23 @@
<p>Fedora 42 · Linux 6.18.0-0.rc5.243.vanilla.fc42.x86_64 · llama.cpp build 1c398dc9e (7034)</p>
<p>Benchmarks captured 14 Nov 2025 · Repo: <a href="https://github.com/kyuz0/amd-strix-halo-toolboxes"
target="_blank" rel="noreferrer">kyuz0/amd-strix-halo-toolboxes</a></p>
<div class="legend">
<label>Legend</label>
<div class="legend-pills">
<button id="hipblas-modal-open" type="button" class="chip small legend-pill legend-pill-default">
hipBLASLt vs hblt0
</button>
<button id="rpc-modal-open" type="button" class="chip small legend-pill legend-pill-rpc">
RPC · dual server
</button>
<button id="rocwmma-modal-open" type="button" class="chip small legend-pill legend-pill-rocwmma">
rocWMMA
</button>
<button id="rocwmma-impr-modal-open" type="button" class="chip small legend-pill legend-pill-rocwmma-improved">
rocWMMA-improved
</button>
</div>
</div>
</header>
<section class="controls">
@@ -68,6 +85,56 @@
<div id="tables"></div>
</section>
<div id="hipblas-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="hipblas-title">
<div class="modal-content">
<button id="hipblas-modal-close" class="modal-close" aria-label="Close dialog">×</button>
<h2 id="hipblas-title">hipBLASLt &amp; hblt0 explained</h2>
<p>The ROCm toolboxes ship with <code>ROCBLAS_USE_HIPBLASLT=1</code> by default. This forces rocBLAS to prefer
the hipBLASLt kernel library, which historically delivered the best throughput on gfx1151 (Strix Halo).</p>
<p>Rows tagged with <code>__hblt0</code> were re-run with <code>ROCBLAS_USE_HIPBLASLT=0</code>, letting rocBLAS
auto-select between hipBLASLt, Tensile, or other kernel providers. These runs show how performance shifts when
the tuned hipBLASLt path is disabled.</p>
<p>hipBLASLt is AMD's LT (low-level tuned) matmul backend, optimized for transformer workloads. Disabling it can
expose regressions or improvements depending on driver versions, so both configurations are published for
comparison.</p>
</div>
</div>
<div id="rpc-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rpc-title">
<div class="modal-content">
<button id="rpc-modal-close" class="modal-close" aria-label="Close dialog">×</button>
<h2 id="rpc-title">RPC · dual server</h2>
<p>These results were produced with two Strix Halo systems (Framework Desktop + HP G1a workstation, each 128&nbsp;GB)
connected over 5&nbsp;Gbps Ethernet. One runs <code>rpc-server</code> from llama.cpp; the other runs
<code>llama-bench --rpc</code>.</p>
<p>This setup allows distributed inference, splitting large GGUF models across both machines. The metric shows what
you can expect when latency is limited by the network and the workload is balanced between two RPC participants.</p>
</div>
</div>
<div id="rocwmma-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rocwmma-title">
<div class="modal-content">
<button id="rocwmma-modal-close" class="modal-close" aria-label="Close dialog">×</button>
<h2 id="rocwmma-title">rocWMMA variants</h2>
<p>Backends labeled <code>-rocwmma</code> are rebuilt with AMD's rocWMMA library, which unlocks matrix multiply
pipelines accelerated via wave matrix multiply-accumulate (WMMA) instructions.</p>
<p>rocWMMA kernels can significantly accelerate BF16/F16 workloads on RDNA3 but may trade stability or memory
usage; comparing plain toolboxes against <code>-rocwmma</code> ones highlights the benefit or cost.</p>
</div>
</div>
<div id="rocwmma-impr-modal" class="modal hidden" role="dialog" aria-modal="true" aria-labelledby="rocwmma-impr-title">
<div class="modal-content">
<button id="rocwmma-impr-modal-close" class="modal-close" aria-label="Close dialog">×</button>
<h2 id="rocwmma-impr-title">rocWMMA-improved builds</h2>
<p>Toolboxes tagged <code>-rocwmma-improved</code> bake in an experimental llama.cpp patch that retunes rocWMMA
kernels for long-context throughput on Strix Halo.</p>
<p>Patch reference: <a href="https://github.com/hjc4869/llama.cpp/commit/12bb5c371bd3c647ef75e8e13de9e311edba604d"
target="_blank" rel="noreferrer">12bb5c371bd3</a>. These builds often run faster for 32k+ contexts, but
the changes are not upstream and may be unstable.</p>
</div>
</div>
<script src="assets/index2.js" type="module"></script>
</body>
+11055 -1
View File
File diff suppressed because it is too large Load Diff