feat: add interactive granular benchmark details to UI and update MTP summary data format
This commit is contained in:
+14
-2
@@ -87,21 +87,33 @@ def run(args):
|
|||||||
# OpenAI-compatible endpoint: timings are in usage or top-level
|
# OpenAI-compatible endpoint: timings are in usage or top-level
|
||||||
usage = r.get("usage", {}) or {}
|
usage = r.get("usage", {}) or {}
|
||||||
t = r.get("timings", {}) or {}
|
t = r.get("timings", {}) or {}
|
||||||
|
prompt_n = usage.get("prompt_tokens") or t.get("prompt_n")
|
||||||
|
prompt_ms = t.get("prompt_ms")
|
||||||
|
prompt_per_second = t.get("prompt_per_second")
|
||||||
predicted_n = usage.get("completion_tokens") or t.get("predicted_n")
|
predicted_n = usage.get("completion_tokens") or t.get("predicted_n")
|
||||||
predicted_per_second = t.get("predicted_per_second") or (predicted_n / wall if wall > 0 else 0)
|
predicted_per_second = t.get("predicted_per_second") or (predicted_n / wall if wall > 0 else 0)
|
||||||
rec = {"name": p["name"], "wall_s": round(wall,3),
|
rec = {"name": p["name"], "wall_s": round(wall,3),
|
||||||
|
"prompt_n": prompt_n,
|
||||||
|
"prompt_ms": round(prompt_ms, 2) if prompt_ms is not None else None,
|
||||||
|
"prompt_per_second": round(prompt_per_second, 2) if prompt_per_second is not None else None,
|
||||||
"predicted_n": predicted_n, "predicted_per_second": round(predicted_per_second, 2),
|
"predicted_n": predicted_n, "predicted_per_second": round(predicted_per_second, 2),
|
||||||
"draft_n": t.get("draft_n",0), "draft_n_accepted": t.get("draft_n_accepted",0)}
|
"draft_n": t.get("draft_n",0), "draft_n_accepted": t.get("draft_n_accepted",0)}
|
||||||
rec["accept_rate"] = round(rec["draft_n_accepted"]/rec["draft_n"],4) if rec["draft_n"] else None
|
rec["accept_rate"] = round(rec["draft_n_accepted"]/rec["draft_n"],4) if rec["draft_n"] else None
|
||||||
out["results"].append(rec)
|
out["results"].append(rec)
|
||||||
ar = f"{rec['accept_rate']:.3f}" if rec["accept_rate"] is not None else "n/a"
|
ar = f"{rec['accept_rate']:.3f}" if rec["accept_rate"] is not None else "n/a"
|
||||||
print(f" {rec['name']:<18} pred={rec['predicted_n']:>4} draft={rec['draft_n']:>4} acc={rec['draft_n_accepted']:>4} rate={ar} tok/s={rec['predicted_per_second']:.1f}")
|
pps = f" pt/s={rec['prompt_per_second']:.1f}" if rec.get("prompt_per_second") else ""
|
||||||
|
print(f" {rec['name']:<18} pred={rec['predicted_n']:>4} draft={rec['draft_n']:>4} acc={rec['draft_n_accepted']:>4} rate={ar} tok/s={rec['predicted_per_second']:.1f}{pps}")
|
||||||
td = sum(x["draft_n"] or 0 for x in out["results"])
|
td = sum(x["draft_n"] or 0 for x in out["results"])
|
||||||
ta = sum(x["draft_n_accepted"] or 0 for x in out["results"])
|
ta = sum(x["draft_n_accepted"] or 0 for x in out["results"])
|
||||||
tp = sum(x["predicted_n"] or 0 for x in out["results"])
|
tp = sum(x["predicted_n"] or 0 for x in out["results"])
|
||||||
|
t_pn = sum(x["prompt_n"] or 0 for x in out["results"])
|
||||||
tw = sum(x["wall_s"] for x in out["results"])
|
tw = sum(x["wall_s"] for x in out["results"])
|
||||||
|
pps_list = [x["prompt_per_second"] for x in out["results"] if x.get("prompt_per_second") is not None]
|
||||||
|
avg_pps = sum(pps_list)/len(pps_list) if pps_list else None
|
||||||
|
|
||||||
out["aggregate"] = {"n_requests": len(out["results"]), "total_predicted": tp, "total_draft": td, "total_draft_accepted": ta,
|
out["aggregate"] = {"n_requests": len(out["results"]), "total_predicted": tp, "total_draft": td, "total_draft_accepted": ta,
|
||||||
"aggregate_accept_rate": round(ta/td,4) if td else None, "wall_s_total": round(tw,2)}
|
"aggregate_accept_rate": round(ta/td,4) if td else None, "wall_s_total": round(tw,2),
|
||||||
|
"total_prompt_tokens": t_pn, "avg_prompt_per_second": round(avg_pps, 2) if avg_pps is not None else None}
|
||||||
print("\nAggregate:", json.dumps(out["aggregate"], indent=2))
|
print("\nAggregate:", json.dumps(out["aggregate"], indent=2))
|
||||||
if args.out:
|
if args.out:
|
||||||
json.dump(out, open(args.out,"w"), indent=2); print("Wrote", args.out)
|
json.dump(out, open(args.out,"w"), indent=2); print("Wrote", args.out)
|
||||||
|
|||||||
@@ -29,8 +29,8 @@ from urllib.error import URLError
|
|||||||
# ── Toolbox definitions ──────────────────────────────────────────────────────
|
# ── Toolbox definitions ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
TOOLBOXES = {
|
TOOLBOXES = {
|
||||||
"rocm-7.2.3-mtp": {
|
"rocm-7.2.3": {
|
||||||
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.2.3-mtp",
|
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.2.3",
|
||||||
"engine_args": [
|
"engine_args": [
|
||||||
"--device", "/dev/dri",
|
"--device", "/dev/dri",
|
||||||
"--device", "/dev/kfd",
|
"--device", "/dev/kfd",
|
||||||
@@ -39,8 +39,8 @@ TOOLBOXES = {
|
|||||||
"--security-opt", "seccomp=unconfined",
|
"--security-opt", "seccomp=unconfined",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"vulkan-radv-mtp": {
|
"vulkan-radv": {
|
||||||
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:vulkan-radv-mtp",
|
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:vulkan-radv",
|
||||||
"engine_args": [
|
"engine_args": [
|
||||||
"--device", "/dev/dri",
|
"--device", "/dev/dri",
|
||||||
"--group-add", "video",
|
"--group-add", "video",
|
||||||
@@ -415,9 +415,9 @@ def print_summary(results_dir: Path):
|
|||||||
baselines[key] = r["_avg_toks"]
|
baselines[key] = r["_avg_toks"]
|
||||||
|
|
||||||
# Print table
|
# Print table
|
||||||
print("\n" + "=" * 100)
|
print("\n" + "=" * 115)
|
||||||
print(f"{'Model':<30} {'Toolbox':<20} {'Mode':<10} {'Avg tok/s':>10} {'Accept%':>9} {'Wall(s)':>8} {'Speedup':>8}")
|
print(f"{'Model':<30} {'Toolbox':<20} {'Mode':<10} {'Prefill pt/s':>13} {'Avg tok/s':>10} {'Accept%':>9} {'Wall(s)':>8} {'Speedup':>8}")
|
||||||
print("-" * 100)
|
print("-" * 115)
|
||||||
|
|
||||||
for r in results:
|
for r in results:
|
||||||
agg = r.get("aggregate", {})
|
agg = r.get("aggregate", {})
|
||||||
@@ -426,6 +426,9 @@ def print_summary(results_dir: Path):
|
|||||||
accept_str = f"{accept * 100:.1f}%" if accept is not None else "—"
|
accept_str = f"{accept * 100:.1f}%" if accept is not None else "—"
|
||||||
avg_toks = r["_avg_toks"]
|
avg_toks = r["_avg_toks"]
|
||||||
|
|
||||||
|
avg_prompt = agg.get("avg_prompt_per_second")
|
||||||
|
prefill_str = f"{avg_prompt:.1f}" if avg_prompt is not None else "—"
|
||||||
|
|
||||||
# Speedup relative to baseline
|
# Speedup relative to baseline
|
||||||
baseline_key = (r["model"], r["toolbox"])
|
baseline_key = (r["model"], r["toolbox"])
|
||||||
baseline_toks = baselines.get(baseline_key)
|
baseline_toks = baselines.get(baseline_key)
|
||||||
@@ -434,9 +437,9 @@ def print_summary(results_dir: Path):
|
|||||||
else:
|
else:
|
||||||
speedup = "—"
|
speedup = "—"
|
||||||
|
|
||||||
print(f"{r['model']:<30} {r['toolbox']:<20} {r['mode']:<10} {avg_toks:>10.1f} {accept_str:>9} {wall:>8.1f} {speedup:>8}")
|
print(f"{r['model']:<30} {r['toolbox']:<20} {r['mode']:<10} {prefill_str:>13} {avg_toks:>10.1f} {accept_str:>9} {wall:>8.1f} {speedup:>8}")
|
||||||
|
|
||||||
print("=" * 100)
|
print("=" * 115)
|
||||||
|
|
||||||
# Write summary.json
|
# Write summary.json
|
||||||
summary_data = []
|
summary_data = []
|
||||||
@@ -446,9 +449,11 @@ def print_summary(results_dir: Path):
|
|||||||
"model": r["model"],
|
"model": r["model"],
|
||||||
"toolbox": r["toolbox"],
|
"toolbox": r["toolbox"],
|
||||||
"mode": r["mode"],
|
"mode": r["mode"],
|
||||||
|
"avg_prompt_tok_s": agg.get("avg_prompt_per_second"),
|
||||||
"avg_tok_s": round(r["_avg_toks"], 1),
|
"avg_tok_s": round(r["_avg_toks"], 1),
|
||||||
"accept_rate": agg.get("aggregate_accept_rate"),
|
"accept_rate": agg.get("aggregate_accept_rate"),
|
||||||
"wall_s_total": agg.get("wall_s_total"),
|
"wall_s_total": agg.get("wall_s_total"),
|
||||||
|
"results": r.get("results", [])
|
||||||
})
|
})
|
||||||
|
|
||||||
summary_path = results_dir / "summary.json"
|
summary_path = results_dir / "summary.json"
|
||||||
|
|||||||
@@ -103,9 +103,90 @@
|
|||||||
font-size: 11px;
|
font-size: 11px;
|
||||||
background: #f1f5ff;
|
background: #f1f5ff;
|
||||||
color: #1d4ed8;
|
color: #1d4ed8;
|
||||||
|
white-space: nowrap;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toolbox-pill.radv {
|
.toolbox-pill.radv {
|
||||||
background: #fdf2f8;
|
background: #fdf2f8;
|
||||||
color: #9d174d;
|
color: #9d174d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Expandable row interactivity */
|
||||||
|
.mtp-table tbody tr.main-row {
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background-color 0.15s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.mtp-table tbody tr.main-row:hover {
|
||||||
|
background-color: var(--hover);
|
||||||
|
}
|
||||||
|
|
||||||
|
.mtp-table tbody tr.main-row td:first-child::before {
|
||||||
|
content: "▶";
|
||||||
|
display: inline-block;
|
||||||
|
font-size: 10px;
|
||||||
|
margin-right: 8px;
|
||||||
|
color: var(--muted);
|
||||||
|
transition: transform 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.mtp-table tbody tr.main-row.expanded td:first-child::before {
|
||||||
|
transform: rotate(90deg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Details row and sub-table */
|
||||||
|
.details-row {
|
||||||
|
background-color: #f8fafc;
|
||||||
|
}
|
||||||
|
|
||||||
|
.details-row.hidden {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.details-row td {
|
||||||
|
padding: 0;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.granular-wrap {
|
||||||
|
padding: 16px 24px;
|
||||||
|
box-shadow: inset 0 2px 4px rgba(0,0,0,0.02);
|
||||||
|
}
|
||||||
|
|
||||||
|
.granular-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
font-size: 13px;
|
||||||
|
background: #fff;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 4px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.granular-table th, .granular-table td {
|
||||||
|
padding: 8px 12px;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.granular-table th {
|
||||||
|
background: #f1f5f9;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--ink);
|
||||||
|
text-transform: uppercase;
|
||||||
|
font-size: 11px;
|
||||||
|
letter-spacing: 0.5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.granular-table td.num {
|
||||||
|
text-align: right;
|
||||||
|
font-variant-numeric: tabular-nums;
|
||||||
|
}
|
||||||
|
|
||||||
|
.granular-table tr:last-child td {
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.granular-table tbody tr:hover {
|
||||||
|
background-color: var(--hover);
|
||||||
|
}
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ function renderTable(runs, tbody) {
|
|||||||
|
|
||||||
rows.forEach(row => {
|
rows.forEach(row => {
|
||||||
const tr = document.createElement("tr");
|
const tr = document.createElement("tr");
|
||||||
|
tr.className = "main-row";
|
||||||
|
|
||||||
// Model
|
// Model
|
||||||
const tdModel = document.createElement("td");
|
const tdModel = document.createElement("td");
|
||||||
@@ -87,7 +88,22 @@ function renderTable(runs, tbody) {
|
|||||||
tr.appendChild(makeMetricCell(mtp3Speed));
|
tr.appendChild(makeMetricCell(mtp3Speed));
|
||||||
tr.appendChild(makeSpeedupCell(baseSpeed, mtp3Speed));
|
tr.appendChild(makeSpeedupCell(baseSpeed, mtp3Speed));
|
||||||
|
|
||||||
|
// Details row
|
||||||
|
const detailsTr = document.createElement("tr");
|
||||||
|
detailsTr.className = "details-row hidden";
|
||||||
|
const detailsTd = document.createElement("td");
|
||||||
|
detailsTd.colSpan = 8;
|
||||||
|
|
||||||
|
detailsTd.innerHTML = makeDetailsHTML(row);
|
||||||
|
detailsTr.appendChild(detailsTd);
|
||||||
|
|
||||||
|
tr.addEventListener("click", () => {
|
||||||
|
tr.classList.toggle("expanded");
|
||||||
|
detailsTr.classList.toggle("hidden");
|
||||||
|
});
|
||||||
|
|
||||||
tbody.appendChild(tr);
|
tbody.appendChild(tr);
|
||||||
|
tbody.appendChild(detailsTr);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -126,3 +142,99 @@ function makeSpeedupCell(base, mtp) {
|
|||||||
}
|
}
|
||||||
return td;
|
return td;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function makeDetailsHTML(row) {
|
||||||
|
if (!row.baseline || !row.baseline.results || row.baseline.results.length === 0) {
|
||||||
|
return `<div class="granular-wrap"><p style="font-size: 13px; color: var(--muted); margin: 0;">Granular data not available for this run. Re-run benchmarks to capture prompt-level metrics.</p></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tasks = new Map();
|
||||||
|
const modes = [
|
||||||
|
{ key: "base", data: row.baseline },
|
||||||
|
{ key: "mtp2", data: row.mtp2 },
|
||||||
|
{ key: "mtp3", data: row.mtp3 }
|
||||||
|
];
|
||||||
|
|
||||||
|
modes.forEach(mode => {
|
||||||
|
if (!mode.data || !mode.data.results) return;
|
||||||
|
mode.data.results.forEach(res => {
|
||||||
|
if (!tasks.has(res.name)) {
|
||||||
|
tasks.set(res.name, { name: res.name });
|
||||||
|
}
|
||||||
|
const t = tasks.get(res.name);
|
||||||
|
t[`${mode.key}_prefill`] = res.prompt_per_second;
|
||||||
|
t[`${mode.key}_toks`] = res.predicted_per_second;
|
||||||
|
t[`${mode.key}_acc`] = res.accept_rate;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
let html = `
|
||||||
|
<div class="granular-wrap">
|
||||||
|
<table class="granular-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Prompt Task</th>
|
||||||
|
<th class="num" title="Prefill (Prompt Processing) tok/s (Baseline)">Prefill (Base)</th>
|
||||||
|
<th class="num" title="Prefill tok/s (MTP-2)">Prefill (MTP-2)</th>
|
||||||
|
<th class="num" title="Prefill tok/s (MTP-3)">Prefill (MTP-3)</th>
|
||||||
|
<th class="num" title="Baseline Gen tok/s">Base Gen</th>
|
||||||
|
<th class="num" title="MTP-2 Gen tok/s">MTP-2 Gen</th>
|
||||||
|
<th class="num" title="MTP-2 Accept Rate">Acc%</th>
|
||||||
|
<th class="num" title="MTP-3 Gen tok/s">MTP-3 Gen</th>
|
||||||
|
<th class="num" title="MTP-3 Accept Rate">Acc%</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
`;
|
||||||
|
|
||||||
|
tasks.forEach(t => {
|
||||||
|
const p_base_val = t.base_prefill;
|
||||||
|
const p_mtp2_val = t.mtp2_prefill;
|
||||||
|
const p_mtp3_val = t.mtp3_prefill;
|
||||||
|
|
||||||
|
const p_base = p_base_val ? p_base_val.toFixed(1) : "—";
|
||||||
|
let p_mtp2 = p_mtp2_val ? p_mtp2_val.toFixed(1) : "—";
|
||||||
|
let p_mtp3 = p_mtp3_val ? p_mtp3_val.toFixed(1) : "—";
|
||||||
|
|
||||||
|
if (p_base_val && p_mtp2_val) {
|
||||||
|
const pct = ((p_mtp2_val - p_base_val) / p_base_val) * 100;
|
||||||
|
const color = pct >= 0 ? '#16a34a' : '#dc2626';
|
||||||
|
const sign = pct > 0 ? '+' : '';
|
||||||
|
p_mtp2 += ` <span style="font-size: 10px; color: ${color}; margin-left: 4px;">${sign}${pct.toFixed(1)}%</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p_base_val && p_mtp3_val) {
|
||||||
|
const pct = ((p_mtp3_val - p_base_val) / p_base_val) * 100;
|
||||||
|
const color = pct >= 0 ? '#16a34a' : '#dc2626';
|
||||||
|
const sign = pct > 0 ? '+' : '';
|
||||||
|
p_mtp3 += ` <span style="font-size: 10px; color: ${color}; margin-left: 4px;">${sign}${pct.toFixed(1)}%</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const g_base = t.base_toks ? t.base_toks.toFixed(1) : "—";
|
||||||
|
const g_mtp2 = t.mtp2_toks ? t.mtp2_toks.toFixed(1) : "—";
|
||||||
|
const a_mtp2 = t.mtp2_acc !== null && t.mtp2_acc !== undefined ? (t.mtp2_acc * 100).toFixed(1) + "%" : "—";
|
||||||
|
const g_mtp3 = t.mtp3_toks ? t.mtp3_toks.toFixed(1) : "—";
|
||||||
|
const a_mtp3 = t.mtp3_acc !== null && t.mtp3_acc !== undefined ? (t.mtp3_acc * 100).toFixed(1) + "%" : "—";
|
||||||
|
|
||||||
|
html += `
|
||||||
|
<tr>
|
||||||
|
<td>${t.name}</td>
|
||||||
|
<td class="num">${p_base}</td>
|
||||||
|
<td class="num">${p_mtp2}</td>
|
||||||
|
<td class="num">${p_mtp3}</td>
|
||||||
|
<td class="num">${g_base}</td>
|
||||||
|
<td class="num">${g_mtp2}</td>
|
||||||
|
<td class="num" style="color: var(--muted);">${a_mtp2}</td>
|
||||||
|
<td class="num">${g_mtp3}</td>
|
||||||
|
<td class="num" style="color: var(--muted);">${a_mtp3}</td>
|
||||||
|
</tr>
|
||||||
|
`;
|
||||||
|
});
|
||||||
|
|
||||||
|
html += `
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>`;
|
||||||
|
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|||||||
+1366
-34
File diff suppressed because it is too large
Load Diff
@@ -1,113 +0,0 @@
|
|||||||
# build stage
|
|
||||||
FROM registry.fedoraproject.org/fedora:43 AS builder
|
|
||||||
|
|
||||||
# rocm 7.2.3 repo
|
|
||||||
RUN <<'EOF'
|
|
||||||
tee /etc/yum.repos.d/rocm.repo <<REPO
|
|
||||||
[ROCm-7.2.3]
|
|
||||||
name=ROCm7.2.3
|
|
||||||
baseurl=https://repo.radeon.com/rocm/rhel10/7.2.3/main
|
|
||||||
enabled=1
|
|
||||||
priority=50
|
|
||||||
gpgcheck=1
|
|
||||||
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
|
|
||||||
REPO
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# deps
|
|
||||||
RUN dnf -y --nodocs --setopt=install_weak_deps=False \
|
|
||||||
--exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \
|
|
||||||
install \
|
|
||||||
make gcc cmake lld clang clang-devel compiler-rt libcurl-devel ninja-build \
|
|
||||||
rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \
|
|
||||||
rocblas rocblas-devel hipblas hipblas-devel rocm-cmake libomp-devel libomp \
|
|
||||||
rocminfo radeontop \
|
|
||||||
git-core vim sudo rsync patch \
|
|
||||||
&& dnf clean all && rm -rf /var/cache/dnf/*
|
|
||||||
|
|
||||||
# rocm env
|
|
||||||
ENV ROCM_PATH=/opt/rocm \
|
|
||||||
HIP_PATH=/opt/rocm \
|
|
||||||
HIP_CLANG_PATH=/opt/rocm/llvm/bin \
|
|
||||||
HIP_DEVICE_LIB_PATH=/opt/rocm/amdgcn/bitcode \
|
|
||||||
PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH
|
|
||||||
|
|
||||||
# llama.cpp (am17an mtp-clean fork — Multi-Token Prediction)
|
|
||||||
WORKDIR /opt/llama.cpp
|
|
||||||
RUN git clone -b mtp-clean --single-branch https://github.com/am17an/llama.cpp.git .
|
|
||||||
|
|
||||||
COPY llama-grammar.patch /tmp/llama-grammar.patch
|
|
||||||
|
|
||||||
# build
|
|
||||||
RUN git clean -xdf \
|
|
||||||
&& git submodule update --recursive \
|
|
||||||
&& patch -p1 < /tmp/llama-grammar.patch \
|
|
||||||
&& cmake -S . -B build \
|
|
||||||
-DGGML_HIP=ON \
|
|
||||||
-DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm -mllvm --amdgpu-unroll-threshold-local=600" \
|
|
||||||
-DAMDGPU_TARGETS=gfx1151 \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DGGML_RPC=ON \
|
|
||||||
-DLLAMA_HIP_UMA=ON \
|
|
||||||
-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=ON \
|
|
||||||
-DROCM_PATH=/opt/rocm \
|
|
||||||
-DHIP_PATH=/opt/rocm \
|
|
||||||
-DHIP_PLATFORM=amd \
|
|
||||||
&& cmake --build build --config Release -- -j$(nproc) \
|
|
||||||
&& cmake --install build --config Release
|
|
||||||
|
|
||||||
# libs
|
|
||||||
RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \
|
|
||||||
&& ldconfig
|
|
||||||
|
|
||||||
# helper
|
|
||||||
COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
|
|
||||||
# runtime stage
|
|
||||||
FROM registry.fedoraproject.org/fedora-minimal:43
|
|
||||||
|
|
||||||
# rocm 7.2.3 repo
|
|
||||||
RUN <<'EOF'
|
|
||||||
tee /etc/yum.repos.d/rocm.repo <<REPO
|
|
||||||
[ROCm-7.2.3]
|
|
||||||
name=ROCm7.2.3
|
|
||||||
baseurl=https://repo.radeon.com/rocm/rhel10/7.2.3/main
|
|
||||||
enabled=1
|
|
||||||
priority=50
|
|
||||||
gpgcheck=1
|
|
||||||
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
|
|
||||||
REPO
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# runtime deps
|
|
||||||
RUN microdnf -y --nodocs --setopt=install_weak_deps=0 \
|
|
||||||
--exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \
|
|
||||||
install \
|
|
||||||
bash ca-certificates libatomic libstdc++ libgcc libgomp sudo \
|
|
||||||
hip-runtime-amd rocblas hipblas \
|
|
||||||
rocminfo radeontop procps-ng \
|
|
||||||
&& microdnf clean all && rm -rf /var/cache/dnf/*
|
|
||||||
|
|
||||||
# copy
|
|
||||||
COPY --from=builder /usr/local/ /usr/local/
|
|
||||||
COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/
|
|
||||||
|
|
||||||
# ld
|
|
||||||
RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \
|
|
||||||
&& echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \
|
|
||||||
&& ldconfig \
|
|
||||||
&& cp -n /usr/local/lib/libllama*.so* /usr/lib64/ 2>/dev/null || true \
|
|
||||||
&& ldconfig
|
|
||||||
|
|
||||||
# helper
|
|
||||||
COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
|
|
||||||
# profile
|
|
||||||
RUN printf '%s\n' \
|
|
||||||
> /etc/profile.d/rocm.sh && chmod +x /etc/profile.d/rocm.sh \
|
|
||||||
&& echo 'source /etc/profile.d/rocm.sh' >> /etc/bashrc
|
|
||||||
|
|
||||||
# shell
|
|
||||||
CMD ["/bin/bash"]
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
# build stage
|
|
||||||
FROM registry.fedoraproject.org/fedora:43 AS builder
|
|
||||||
|
|
||||||
# deps
|
|
||||||
RUN dnf -y --nodocs --setopt=install_weak_deps=False install \
|
|
||||||
git vim \
|
|
||||||
make gcc cmake ninja-build lld clang clang-devel compiler-rt libcurl-devel \
|
|
||||||
vulkan-loader-devel vulkaninfo mesa-vulkan-drivers \
|
|
||||||
spirv-headers-devel radeontop glslc patch \
|
|
||||||
&& dnf clean all && rm -rf /var/cache/dnf/*
|
|
||||||
|
|
||||||
# llama.cpp (am17an mtp-clean fork — Multi-Token Prediction)
|
|
||||||
WORKDIR /opt/llama.cpp
|
|
||||||
RUN git clone -b mtp-clean --single-branch https://github.com/am17an/llama.cpp.git .
|
|
||||||
|
|
||||||
COPY llama-grammar.patch /tmp/llama-grammar.patch
|
|
||||||
|
|
||||||
# build
|
|
||||||
RUN git clean -xdf \
|
|
||||||
&& git submodule update --recursive \
|
|
||||||
&& patch -p1 < /tmp/llama-grammar.patch \
|
|
||||||
&& cmake -S . -B build -G Ninja \
|
|
||||||
-DGGML_VULKAN=ON \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DGGML_RPC=ON \
|
|
||||||
-DCMAKE_INSTALL_PREFIX=/usr \
|
|
||||||
-DLLAMA_BUILD_TESTS=OFF \
|
|
||||||
-DLLAMA_BUILD_EXAMPLES=ON \
|
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
|
||||||
&& cmake --build build --config Release \
|
|
||||||
&& cmake --install build --config Release
|
|
||||||
|
|
||||||
# libs
|
|
||||||
RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \
|
|
||||||
&& ldconfig
|
|
||||||
|
|
||||||
# helper
|
|
||||||
COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
|
|
||||||
|
|
||||||
# runtime stage
|
|
||||||
FROM registry.fedoraproject.org/fedora-minimal:43
|
|
||||||
|
|
||||||
# runtime deps
|
|
||||||
RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \
|
|
||||||
bash ca-certificates libatomic libstdc++ libgcc \
|
|
||||||
vulkan-loader vulkan-loader-devel vulkaninfo mesa-vulkan-drivers radeontop procps-ng \
|
|
||||||
&& microdnf clean all && rm -rf /var/cache/dnf/*
|
|
||||||
|
|
||||||
# copy
|
|
||||||
COPY --from=builder /usr/ /usr/
|
|
||||||
COPY --from=builder /usr/local/ /usr/local/
|
|
||||||
COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/
|
|
||||||
|
|
||||||
# ld
|
|
||||||
RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \
|
|
||||||
&& echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \
|
|
||||||
&& ldconfig \
|
|
||||||
&& cp -n /usr/local/lib/libllama*.so* /usr/lib64/ 2>/dev/null || true \
|
|
||||||
&& ldconfig
|
|
||||||
|
|
||||||
# helper
|
|
||||||
COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
RUN chmod +x /usr/local/bin/gguf-vram-estimator.py
|
|
||||||
|
|
||||||
# shell
|
|
||||||
CMD ["/bin/bash"]
|
|
||||||
Reference in New Issue
Block a user