feat: add MTP benchmark page with dedicated data summary and visualization components

2026-05-15 11:51:02 +01:00
parent adf4d1a5fa
commit a1ef189df0
5 changed files with 404 additions and 0 deletions
@@ -0,0 +1,111 @@
+.description {
+    font-size: 13px;
+    color: var(--ink);
+    max-width: 800px;
+    line-height: 1.5;
+    margin-top: 12px;
+}
+
+.description a {
+    color: var(--accent);
+    text-decoration: none;
+}
+
+.description a:hover {
+    text-decoration: underline;
+}
+
+.mtp-layout-inner {
+    max-width: 1050px;
+    margin-left: auto !important;
+    margin-right: auto !important;
+    width: 100%;
+}
+
+.mtp-table {
+    width: 100%;
+    min-width: auto;
+    font-size: 14px;
+}
+
+.mtp-table.hidden {
+    display: none;
+}
+
+.mtp-table th {
+    text-align: left;
+    vertical-align: bottom;
+    padding: 12px 16px;
+    font-size: 13px;
+}
+
+.mtp-table td {
+    padding: 12px 16px;
+}
+
+.mtp-table th.metric-col {
+    text-align: right;
+    width: 120px;
+}
+
+.mtp-table td.metric-col {
+    text-align: right;
+    font-feature-settings: "tnum";
+    font-weight: 600;
+}
+
+.mtp-table td.metric-col .measure {
+    font-size: 15px;
+}
+
+.mtp-table th.model, .mtp-table td.model {
+    width: 320px;
+}
+
+.mtp-table th .sub {
+    font-weight: 400;
+    font-size: 11px;
+    color: var(--muted);
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+}
+
+.speedup-badge {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    padding: 2px 8px;
+    border-radius: 6px;
+    font-size: 12px;
+    font-weight: 700;
+}
+
+.speedup-high {
+    background: #d7f5e3;
+    color: #025333;
+}
+
+.speedup-med {
+    background: #eef9ff;
+    color: #0a517a;
+}
+
+.speedup-low {
+    background: #fdf2f8;
+    color: #9d174d;
+}
+
+.toolbox-pill {
+    display: inline-flex;
+    align-items: center;
+    padding: 2px 8px;
+    border-radius: 999px;
+    font-size: 11px;
+    background: #f1f5ff;
+    color: #1d4ed8;
+}
+
+.toolbox-pill.radv {
+    background: #fdf2f8;
+    color: #9d174d;
+}
@@ -0,0 +1,128 @@
+document.addEventListener("DOMContentLoaded", async () => {
+    const statsLine = document.getElementById("stats-line");
+    const table = document.getElementById("mtp-table");
+    const tbody = document.getElementById("mtp-tbody");
+
+    try {
+        const res = await fetch("mtp-summary.json");
+        if (!res.ok) throw new Error("Network response was not ok");
+        const data = await res.json();
+        
+        renderTable(data, tbody);
+        table.classList.remove("hidden");
+        statsLine.textContent = `Showing ${data.length} benchmark runs`;
+
+    } catch (err) {
+        console.error("Failed to load mtp-summary.json", err);
+        statsLine.textContent = "Failed to load mtp-summary.json. Ensure the file is present in the docs folder.";
+    }
+});
+
+function renderTable(runs, tbody) {
+    // Group by model and toolbox
+    const grouped = new Map();
+    
+    runs.forEach(run => {
+        const key = `${run.model}|${run.toolbox}`;
+        if (!grouped.has(key)) {
+            grouped.set(key, {
+                model: run.model,
+                toolbox: run.toolbox,
+                baseline: null,
+                mtp2: null,
+                mtp3: null
+            });
+        }
+        
+        const entry = grouped.get(key);
+        if (run.mode === "baseline") entry.baseline = run;
+        if (run.mode === "mtp-2") entry.mtp2 = run;
+        if (run.mode === "mtp-3") entry.mtp3 = run;
+    });
+
+    const rows = Array.from(grouped.values()).sort((a, b) => {
+        if (a.model !== b.model) return a.model.localeCompare(b.model);
+        return a.toolbox.localeCompare(b.toolbox);
+    });
+
+    tbody.innerHTML = "";
+    
+    rows.forEach(row => {
+        const tr = document.createElement("tr");
+
+        // Model
+        const tdModel = document.createElement("td");
+        tdModel.className = "model";
+        const modelHead = document.createElement("div");
+        modelHead.className = "model-head";
+        const nameSpan = document.createElement("span");
+        nameSpan.className = "model-name";
+        nameSpan.textContent = row.model;
+        modelHead.appendChild(nameSpan);
+        tdModel.appendChild(modelHead);
+        tr.appendChild(tdModel);
+
+        // Toolbox
+        const tdToolbox = document.createElement("td");
+        const tbPill = document.createElement("span");
+        tbPill.className = "toolbox-pill";
+        if (row.toolbox.includes("vulkan") || row.toolbox.includes("radv")) {
+            tbPill.classList.add("radv");
+        }
+        tbPill.textContent = row.toolbox;
+        tdToolbox.appendChild(tbPill);
+        tr.appendChild(tdToolbox);
+
+        // Baseline
+        const baseSpeed = row.baseline ? row.baseline.avg_tok_s : null;
+        tr.appendChild(makeMetricCell(baseSpeed));
+
+        // MTP-2
+        const mtp2Speed = row.mtp2 ? row.mtp2.avg_tok_s : null;
+        tr.appendChild(makeMetricCell(mtp2Speed));
+        tr.appendChild(makeSpeedupCell(baseSpeed, mtp2Speed));
+
+        // MTP-3
+        const mtp3Speed = row.mtp3 ? row.mtp3.avg_tok_s : null;
+        tr.appendChild(makeMetricCell(mtp3Speed));
+        tr.appendChild(makeSpeedupCell(baseSpeed, mtp3Speed));
+
+        tbody.appendChild(tr);
+    });
+}
+
+function makeMetricCell(val) {
+    const td = document.createElement("td");
+    td.className = "metric-col";
+    if (val !== null && val !== undefined) {
+        td.innerHTML = `<span class="measure">${val.toFixed(1)}</span>`;
+    } else {
+        td.innerHTML = `<span class="cell-empty">—</span>`;
+    }
+    return td;
+}
+
+function makeSpeedupCell(base, mtp) {
+    const td = document.createElement("td");
+    td.className = "metric-col";
+    
+    if (base && mtp && base > 0) {
+        const ratio = mtp / base;
+        const badge = document.createElement("span");
+        badge.className = "speedup-badge";
+        badge.textContent = `${ratio.toFixed(2)}×`;
+        
+        if (ratio >= 1.8) {
+            badge.classList.add("speedup-high");
+        } else if (ratio >= 1.3) {
+            badge.classList.add("speedup-med");
+        } else {
+            badge.classList.add("speedup-low");
+        }
+        
+        td.appendChild(badge);
+    } else {
+        td.innerHTML = `<span class="cell-empty">—</span>`;
+    }
+    return td;
+}
@@ -27,6 +27,9 @@
                <button id="rocwmma-modal-open" type="button" class="chip small legend-pill legend-pill-rocwmma">
                    rocWMMA
                </button>
+                <a href="mtp.html" class="chip small legend-pill" style="text-decoration: none; background: #eef9ff; color: #0a517a; border: 1px solid #c7e9ff; font-weight: 600;">
+                    ★ View Experimental MTP Benchmarks
+                </a>
            </div>
        </div>
    </header>
@@ -0,0 +1,98 @@
+[
+  {
+    "model": "Qwen3.6-27B-UD-Q8_K_XL",
+    "toolbox": "rocm-7.2.3-mtp",
+    "mode": "baseline",
+    "avg_tok_s": 6.5,
+    "accept_rate": null,
+    "wall_s_total": 273.39
+  },
+  {
+    "model": "Qwen3.6-27B-UD-Q8_K_XL",
+    "toolbox": "rocm-7.2.3-mtp",
+    "mode": "mtp-2",
+    "avg_tok_s": 12.4,
+    "accept_rate": 0.7971,
+    "wall_s_total": 147.31
+  },
+  {
+    "model": "Qwen3.6-27B-UD-Q8_K_XL",
+    "toolbox": "rocm-7.2.3-mtp",
+    "mode": "mtp-3",
+    "avg_tok_s": 13.5,
+    "accept_rate": 0.744,
+    "wall_s_total": 135.2
+  },
+  {
+    "model": "Qwen3.6-27B-UD-Q8_K_XL",
+    "toolbox": "vulkan-radv-mtp",
+    "mode": "baseline",
+    "avg_tok_s": 6.3,
+    "accept_rate": null,
+    "wall_s_total": 283.86
+  },
+  {
+    "model": "Qwen3.6-27B-UD-Q8_K_XL",
+    "toolbox": "vulkan-radv-mtp",
+    "mode": "mtp-2",
+    "avg_tok_s": 11.7,
+    "accept_rate": 0.8024,
+    "wall_s_total": 159.41
+  },
+  {
+    "model": "Qwen3.6-27B-UD-Q8_K_XL",
+    "toolbox": "vulkan-radv-mtp",
+    "mode": "mtp-3",
+    "avg_tok_s": 13.3,
+    "accept_rate": 0.7301,
+    "wall_s_total": 141.74
+  },
+  {
+    "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
+    "toolbox": "rocm-7.2.3-mtp",
+    "mode": "baseline",
+    "avg_tok_s": 48.7,
+    "accept_rate": null,
+    "wall_s_total": 37.55
+  },
+  {
+    "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
+    "toolbox": "rocm-7.2.3-mtp",
+    "mode": "mtp-2",
+    "avg_tok_s": 64.5,
+    "accept_rate": 0.7958,
+    "wall_s_total": 29.33
+  },
+  {
+    "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
+    "toolbox": "rocm-7.2.3-mtp",
+    "mode": "mtp-3",
+    "avg_tok_s": 68.3,
+    "accept_rate": 0.7386,
+    "wall_s_total": 27.83
+  },
+  {
+    "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
+    "toolbox": "vulkan-radv-mtp",
+    "mode": "baseline",
+    "avg_tok_s": 58.7,
+    "accept_rate": null,
+    "wall_s_total": 31.93
+  },
+  {
+    "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
+    "toolbox": "vulkan-radv-mtp",
+    "mode": "mtp-2",
+    "avg_tok_s": 72.8,
+    "accept_rate": 0.7907,
+    "wall_s_total": 26.85
+  },
+  {
+    "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL",
+    "toolbox": "vulkan-radv-mtp",
+    "mode": "mtp-3",
+    "avg_tok_s": 74.6,
+    "accept_rate": 0.7374,
+    "wall_s_total": 26.36
+  }
+]
@@ -0,0 +1,64 @@
+<!doctype html>
+<html lang="en">
+
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>AMD Strix Halo — MTP Benchmark Results</title>
+    <link rel="stylesheet" href="assets/index2.css">
+    <link rel="stylesheet" href="assets/mtp.css?v=2">
+    <script defer data-domain="kyuz0.github.io/amd-strix-halo-toolboxes" src="https://plausible.skybound.link/js/plausible.js"></script>
+</head>
+
+<body>
+    <header>
+        <div class="mtp-layout-inner">
+            <h1>AMD Ryzen AI MAX+ 395 “Strix Halo” — MTP Benchmarks</h1>
+            <p>Framework Desktop · AMD Ryzen AI MAX 395+ · 128GB unified RAM</p>
+            <p class="description">
+                Multi-Token Prediction (MTP) is an experimental speculative decoding feature for `llama.cpp`
+                (see <a href="https://github.com/ggml-org/llama.cpp/pull/22673" target="_blank" rel="noreferrer">PR #22673</a>).
+                It allows supported models to predict multiple tokens per forward pass, significantly increasing generation speed.
+                These benchmarks compare the baseline generation speed against MTP with 2-token and 3-token drafts.
+            </p>
+        </div>
+    </header>
+
+    <section class="panel compact">
+        <div class="panel-split mtp-layout-inner">
+            <div class="stats-box" style="margin-left: 0;">
+                <div class="stat-line" id="stats-line">Loading results...</div>
+            </div>
+            <div class="actions" style="margin-left: auto;">
+                <a href="index.html" class="chip small" style="text-decoration: none;">← Back to Main Benchmarks</a>
+            </div>
+        </div>
+    </section>
+
+    <section class="panel compact" id="tables-panel" style="border-bottom: none; background: transparent;">
+        <div class="table-wrap mtp-layout-inner" style="margin-top: 16px; margin-bottom: 32px; background: var(--card);">
+            <div class="table-scroll">
+                <table id="mtp-table" class="mtp-table hidden">
+                    <thead>
+                        <tr>
+                            <th class="model">Model</th>
+                            <th>Toolbox</th>
+                            <th class="metric-col">Baseline<br><span class="sub">tok/s</span></th>
+                            <th class="metric-col">MTP-2<br><span class="sub">tok/s</span></th>
+                            <th class="metric-col">Speedup<br><span class="sub">MTP-2</span></th>
+                            <th class="metric-col">MTP-3<br><span class="sub">tok/s</span></th>
+                            <th class="metric-col">Speedup<br><span class="sub">MTP-3</span></th>
+                        </tr>
+                    </thead>
+                    <tbody id="mtp-tbody">
+                        <!-- Rows populated by JS -->
+                    </tbody>
+                </table>
+            </div>
+        </div>
+    </section>
+
+    <script src="assets/mtp.js" type="module"></script>
+</body>
+
+</html>