feat: add MTP benchmark page with dedicated data summary and visualization components

This commit is contained in:
Donato Capitella
2026-05-15 11:51:02 +01:00
parent adf4d1a5fa
commit a1ef189df0
5 changed files with 404 additions and 0 deletions
+111
View File
@@ -0,0 +1,111 @@
.description {
font-size: 13px;
color: var(--ink);
max-width: 800px;
line-height: 1.5;
margin-top: 12px;
}
.description a {
color: var(--accent);
text-decoration: none;
}
.description a:hover {
text-decoration: underline;
}
.mtp-layout-inner {
max-width: 1050px;
margin-left: auto !important;
margin-right: auto !important;
width: 100%;
}
.mtp-table {
width: 100%;
min-width: auto;
font-size: 14px;
}
.mtp-table.hidden {
display: none;
}
.mtp-table th {
text-align: left;
vertical-align: bottom;
padding: 12px 16px;
font-size: 13px;
}
.mtp-table td {
padding: 12px 16px;
}
.mtp-table th.metric-col {
text-align: right;
width: 120px;
}
.mtp-table td.metric-col {
text-align: right;
font-feature-settings: "tnum";
font-weight: 600;
}
.mtp-table td.metric-col .measure {
font-size: 15px;
}
.mtp-table th.model, .mtp-table td.model {
width: 320px;
}
.mtp-table th .sub {
font-weight: 400;
font-size: 11px;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.05em;
}
.speedup-badge {
display: inline-flex;
align-items: center;
justify-content: center;
padding: 2px 8px;
border-radius: 6px;
font-size: 12px;
font-weight: 700;
}
.speedup-high {
background: #d7f5e3;
color: #025333;
}
.speedup-med {
background: #eef9ff;
color: #0a517a;
}
.speedup-low {
background: #fdf2f8;
color: #9d174d;
}
.toolbox-pill {
display: inline-flex;
align-items: center;
padding: 2px 8px;
border-radius: 999px;
font-size: 11px;
background: #f1f5ff;
color: #1d4ed8;
}
.toolbox-pill.radv {
background: #fdf2f8;
color: #9d174d;
}
+128
View File
@@ -0,0 +1,128 @@
document.addEventListener("DOMContentLoaded", async () => {
const statsLine = document.getElementById("stats-line");
const table = document.getElementById("mtp-table");
const tbody = document.getElementById("mtp-tbody");
try {
const res = await fetch("mtp-summary.json");
if (!res.ok) throw new Error("Network response was not ok");
const data = await res.json();
renderTable(data, tbody);
table.classList.remove("hidden");
statsLine.textContent = `Showing ${data.length} benchmark runs`;
} catch (err) {
console.error("Failed to load mtp-summary.json", err);
statsLine.textContent = "Failed to load mtp-summary.json. Ensure the file is present in the docs folder.";
}
});
function renderTable(runs, tbody) {
// Group by model and toolbox
const grouped = new Map();
runs.forEach(run => {
const key = `${run.model}|${run.toolbox}`;
if (!grouped.has(key)) {
grouped.set(key, {
model: run.model,
toolbox: run.toolbox,
baseline: null,
mtp2: null,
mtp3: null
});
}
const entry = grouped.get(key);
if (run.mode === "baseline") entry.baseline = run;
if (run.mode === "mtp-2") entry.mtp2 = run;
if (run.mode === "mtp-3") entry.mtp3 = run;
});
const rows = Array.from(grouped.values()).sort((a, b) => {
if (a.model !== b.model) return a.model.localeCompare(b.model);
return a.toolbox.localeCompare(b.toolbox);
});
tbody.innerHTML = "";
rows.forEach(row => {
const tr = document.createElement("tr");
// Model
const tdModel = document.createElement("td");
tdModel.className = "model";
const modelHead = document.createElement("div");
modelHead.className = "model-head";
const nameSpan = document.createElement("span");
nameSpan.className = "model-name";
nameSpan.textContent = row.model;
modelHead.appendChild(nameSpan);
tdModel.appendChild(modelHead);
tr.appendChild(tdModel);
// Toolbox
const tdToolbox = document.createElement("td");
const tbPill = document.createElement("span");
tbPill.className = "toolbox-pill";
if (row.toolbox.includes("vulkan") || row.toolbox.includes("radv")) {
tbPill.classList.add("radv");
}
tbPill.textContent = row.toolbox;
tdToolbox.appendChild(tbPill);
tr.appendChild(tdToolbox);
// Baseline
const baseSpeed = row.baseline ? row.baseline.avg_tok_s : null;
tr.appendChild(makeMetricCell(baseSpeed));
// MTP-2
const mtp2Speed = row.mtp2 ? row.mtp2.avg_tok_s : null;
tr.appendChild(makeMetricCell(mtp2Speed));
tr.appendChild(makeSpeedupCell(baseSpeed, mtp2Speed));
// MTP-3
const mtp3Speed = row.mtp3 ? row.mtp3.avg_tok_s : null;
tr.appendChild(makeMetricCell(mtp3Speed));
tr.appendChild(makeSpeedupCell(baseSpeed, mtp3Speed));
tbody.appendChild(tr);
});
}
function makeMetricCell(val) {
const td = document.createElement("td");
td.className = "metric-col";
if (val !== null && val !== undefined) {
td.innerHTML = `<span class="measure">${val.toFixed(1)}</span>`;
} else {
td.innerHTML = `<span class="cell-empty">—</span>`;
}
return td;
}
function makeSpeedupCell(base, mtp) {
const td = document.createElement("td");
td.className = "metric-col";
if (base && mtp && base > 0) {
const ratio = mtp / base;
const badge = document.createElement("span");
badge.className = "speedup-badge";
badge.textContent = `${ratio.toFixed(2)}×`;
if (ratio >= 1.8) {
badge.classList.add("speedup-high");
} else if (ratio >= 1.3) {
badge.classList.add("speedup-med");
} else {
badge.classList.add("speedup-low");
}
td.appendChild(badge);
} else {
td.innerHTML = `<span class="cell-empty">—</span>`;
}
return td;
}
+3
View File
@@ -27,6 +27,9 @@
<button id="rocwmma-modal-open" type="button" class="chip small legend-pill legend-pill-rocwmma">
rocWMMA
</button>
<a href="mtp.html" class="chip small legend-pill" style="text-decoration: none; background: #eef9ff; color: #0a517a; border: 1px solid #c7e9ff; font-weight: 600;">
★ View Experimental MTP Benchmarks
</a>
</div>
</div>
</header>
+98
View File
@@ -0,0 +1,98 @@
[
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "rocm-7.2.3-mtp",
"mode": "baseline",
"avg_tok_s": 6.5,
"accept_rate": null,
"wall_s_total": 273.39
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "rocm-7.2.3-mtp",
"mode": "mtp-2",
"avg_tok_s": 12.4,
"accept_rate": 0.7971,
"wall_s_total": 147.31
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "rocm-7.2.3-mtp",
"mode": "mtp-3",
"avg_tok_s": 13.5,
"accept_rate": 0.744,
"wall_s_total": 135.2
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "vulkan-radv-mtp",
"mode": "baseline",
"avg_tok_s": 6.3,
"accept_rate": null,
"wall_s_total": 283.86
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "vulkan-radv-mtp",
"mode": "mtp-2",
"avg_tok_s": 11.7,
"accept_rate": 0.8024,
"wall_s_total": 159.41
},
{
"model": "Qwen3.6-27B-UD-Q8_K_XL",
"toolbox": "vulkan-radv-mtp",
"mode": "mtp-3",
"avg_tok_s": 13.3,
"accept_rate": 0.7301,
"wall_s_total": 141.74
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "rocm-7.2.3-mtp",
"mode": "baseline",
"avg_tok_s": 48.7,
"accept_rate": null,
"wall_s_total": 37.55
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "rocm-7.2.3-mtp",
"mode": "mtp-2",
"avg_tok_s": 64.5,
"accept_rate": 0.7958,
"wall_s_total": 29.33
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "rocm-7.2.3-mtp",
"mode": "mtp-3",
"avg_tok_s": 68.3,
"accept_rate": 0.7386,
"wall_s_total": 27.83
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "vulkan-radv-mtp",
"mode": "baseline",
"avg_tok_s": 58.7,
"accept_rate": null,
"wall_s_total": 31.93
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "vulkan-radv-mtp",
"mode": "mtp-2",
"avg_tok_s": 72.8,
"accept_rate": 0.7907,
"wall_s_total": 26.85
},
{
"model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
"toolbox": "vulkan-radv-mtp",
"mode": "mtp-3",
"avg_tok_s": 74.6,
"accept_rate": 0.7374,
"wall_s_total": 26.36
}
]
+64
View File
@@ -0,0 +1,64 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>AMD Strix Halo — MTP Benchmark Results</title>
<link rel="stylesheet" href="assets/index2.css">
<link rel="stylesheet" href="assets/mtp.css?v=2">
<script defer data-domain="kyuz0.github.io/amd-strix-halo-toolboxes" src="https://plausible.skybound.link/js/plausible.js"></script>
</head>
<body>
<header>
<div class="mtp-layout-inner">
<h1>AMD Ryzen AI MAX+ 395 “Strix Halo” — MTP Benchmarks</h1>
<p>Framework Desktop · AMD Ryzen AI MAX 395+ · 128GB unified RAM</p>
<p class="description">
Multi-Token Prediction (MTP) is an experimental speculative decoding feature for `llama.cpp`
(see <a href="https://github.com/ggml-org/llama.cpp/pull/22673" target="_blank" rel="noreferrer">PR #22673</a>).
It allows supported models to predict multiple tokens per forward pass, significantly increasing generation speed.
These benchmarks compare the baseline generation speed against MTP with 2-token and 3-token drafts.
</p>
</div>
</header>
<section class="panel compact">
<div class="panel-split mtp-layout-inner">
<div class="stats-box" style="margin-left: 0;">
<div class="stat-line" id="stats-line">Loading results...</div>
</div>
<div class="actions" style="margin-left: auto;">
<a href="index.html" class="chip small" style="text-decoration: none;">← Back to Main Benchmarks</a>
</div>
</div>
</section>
<section class="panel compact" id="tables-panel" style="border-bottom: none; background: transparent;">
<div class="table-wrap mtp-layout-inner" style="margin-top: 16px; margin-bottom: 32px; background: var(--card);">
<div class="table-scroll">
<table id="mtp-table" class="mtp-table hidden">
<thead>
<tr>
<th class="model">Model</th>
<th>Toolbox</th>
<th class="metric-col">Baseline<br><span class="sub">tok/s</span></th>
<th class="metric-col">MTP-2<br><span class="sub">tok/s</span></th>
<th class="metric-col">Speedup<br><span class="sub">MTP-2</span></th>
<th class="metric-col">MTP-3<br><span class="sub">tok/s</span></th>
<th class="metric-col">Speedup<br><span class="sub">MTP-3</span></th>
</tr>
</thead>
<tbody id="mtp-tbody">
<!-- Rows populated by JS -->
</tbody>
</table>
</div>
</div>
</section>
<script src="assets/mtp.js" type="module"></script>
</body>
</html>