feat: add interactive granular benchmark details to UI and update MTP summary data format
This commit is contained in:
+14
-2
@@ -87,21 +87,33 @@ def run(args):
|
||||
# OpenAI-compatible endpoint: timings are in usage or top-level
|
||||
usage = r.get("usage", {}) or {}
|
||||
t = r.get("timings", {}) or {}
|
||||
prompt_n = usage.get("prompt_tokens") or t.get("prompt_n")
|
||||
prompt_ms = t.get("prompt_ms")
|
||||
prompt_per_second = t.get("prompt_per_second")
|
||||
predicted_n = usage.get("completion_tokens") or t.get("predicted_n")
|
||||
predicted_per_second = t.get("predicted_per_second") or (predicted_n / wall if wall > 0 else 0)
|
||||
rec = {"name": p["name"], "wall_s": round(wall,3),
|
||||
"prompt_n": prompt_n,
|
||||
"prompt_ms": round(prompt_ms, 2) if prompt_ms is not None else None,
|
||||
"prompt_per_second": round(prompt_per_second, 2) if prompt_per_second is not None else None,
|
||||
"predicted_n": predicted_n, "predicted_per_second": round(predicted_per_second, 2),
|
||||
"draft_n": t.get("draft_n",0), "draft_n_accepted": t.get("draft_n_accepted",0)}
|
||||
rec["accept_rate"] = round(rec["draft_n_accepted"]/rec["draft_n"],4) if rec["draft_n"] else None
|
||||
out["results"].append(rec)
|
||||
ar = f"{rec['accept_rate']:.3f}" if rec["accept_rate"] is not None else "n/a"
|
||||
print(f" {rec['name']:<18} pred={rec['predicted_n']:>4} draft={rec['draft_n']:>4} acc={rec['draft_n_accepted']:>4} rate={ar} tok/s={rec['predicted_per_second']:.1f}")
|
||||
pps = f" pt/s={rec['prompt_per_second']:.1f}" if rec.get("prompt_per_second") else ""
|
||||
print(f" {rec['name']:<18} pred={rec['predicted_n']:>4} draft={rec['draft_n']:>4} acc={rec['draft_n_accepted']:>4} rate={ar} tok/s={rec['predicted_per_second']:.1f}{pps}")
|
||||
td = sum(x["draft_n"] or 0 for x in out["results"])
|
||||
ta = sum(x["draft_n_accepted"] or 0 for x in out["results"])
|
||||
tp = sum(x["predicted_n"] or 0 for x in out["results"])
|
||||
t_pn = sum(x["prompt_n"] or 0 for x in out["results"])
|
||||
tw = sum(x["wall_s"] for x in out["results"])
|
||||
pps_list = [x["prompt_per_second"] for x in out["results"] if x.get("prompt_per_second") is not None]
|
||||
avg_pps = sum(pps_list)/len(pps_list) if pps_list else None
|
||||
|
||||
out["aggregate"] = {"n_requests": len(out["results"]), "total_predicted": tp, "total_draft": td, "total_draft_accepted": ta,
|
||||
"aggregate_accept_rate": round(ta/td,4) if td else None, "wall_s_total": round(tw,2)}
|
||||
"aggregate_accept_rate": round(ta/td,4) if td else None, "wall_s_total": round(tw,2),
|
||||
"total_prompt_tokens": t_pn, "avg_prompt_per_second": round(avg_pps, 2) if avg_pps is not None else None}
|
||||
print("\nAggregate:", json.dumps(out["aggregate"], indent=2))
|
||||
if args.out:
|
||||
json.dump(out, open(args.out,"w"), indent=2); print("Wrote", args.out)
|
||||
|
||||
@@ -29,8 +29,8 @@ from urllib.error import URLError
|
||||
# ── Toolbox definitions ──────────────────────────────────────────────────────
|
||||
|
||||
TOOLBOXES = {
|
||||
"rocm-7.2.3-mtp": {
|
||||
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.2.3-mtp",
|
||||
"rocm-7.2.3": {
|
||||
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.2.3",
|
||||
"engine_args": [
|
||||
"--device", "/dev/dri",
|
||||
"--device", "/dev/kfd",
|
||||
@@ -39,8 +39,8 @@ TOOLBOXES = {
|
||||
"--security-opt", "seccomp=unconfined",
|
||||
],
|
||||
},
|
||||
"vulkan-radv-mtp": {
|
||||
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:vulkan-radv-mtp",
|
||||
"vulkan-radv": {
|
||||
"image": "docker.io/kyuz0/amd-strix-halo-toolboxes:vulkan-radv",
|
||||
"engine_args": [
|
||||
"--device", "/dev/dri",
|
||||
"--group-add", "video",
|
||||
@@ -415,9 +415,9 @@ def print_summary(results_dir: Path):
|
||||
baselines[key] = r["_avg_toks"]
|
||||
|
||||
# Print table
|
||||
print("\n" + "=" * 100)
|
||||
print(f"{'Model':<30} {'Toolbox':<20} {'Mode':<10} {'Avg tok/s':>10} {'Accept%':>9} {'Wall(s)':>8} {'Speedup':>8}")
|
||||
print("-" * 100)
|
||||
print("\n" + "=" * 115)
|
||||
print(f"{'Model':<30} {'Toolbox':<20} {'Mode':<10} {'Prefill pt/s':>13} {'Avg tok/s':>10} {'Accept%':>9} {'Wall(s)':>8} {'Speedup':>8}")
|
||||
print("-" * 115)
|
||||
|
||||
for r in results:
|
||||
agg = r.get("aggregate", {})
|
||||
@@ -426,6 +426,9 @@ def print_summary(results_dir: Path):
|
||||
accept_str = f"{accept * 100:.1f}%" if accept is not None else "—"
|
||||
avg_toks = r["_avg_toks"]
|
||||
|
||||
avg_prompt = agg.get("avg_prompt_per_second")
|
||||
prefill_str = f"{avg_prompt:.1f}" if avg_prompt is not None else "—"
|
||||
|
||||
# Speedup relative to baseline
|
||||
baseline_key = (r["model"], r["toolbox"])
|
||||
baseline_toks = baselines.get(baseline_key)
|
||||
@@ -434,9 +437,9 @@ def print_summary(results_dir: Path):
|
||||
else:
|
||||
speedup = "—"
|
||||
|
||||
print(f"{r['model']:<30} {r['toolbox']:<20} {r['mode']:<10} {avg_toks:>10.1f} {accept_str:>9} {wall:>8.1f} {speedup:>8}")
|
||||
print(f"{r['model']:<30} {r['toolbox']:<20} {r['mode']:<10} {prefill_str:>13} {avg_toks:>10.1f} {accept_str:>9} {wall:>8.1f} {speedup:>8}")
|
||||
|
||||
print("=" * 100)
|
||||
print("=" * 115)
|
||||
|
||||
# Write summary.json
|
||||
summary_data = []
|
||||
@@ -446,9 +449,11 @@ def print_summary(results_dir: Path):
|
||||
"model": r["model"],
|
||||
"toolbox": r["toolbox"],
|
||||
"mode": r["mode"],
|
||||
"avg_prompt_tok_s": agg.get("avg_prompt_per_second"),
|
||||
"avg_tok_s": round(r["_avg_toks"], 1),
|
||||
"accept_rate": agg.get("aggregate_accept_rate"),
|
||||
"wall_s_total": agg.get("wall_s_total"),
|
||||
"results": r.get("results", [])
|
||||
})
|
||||
|
||||
summary_path = results_dir / "summary.json"
|
||||
|
||||
Reference in New Issue
Block a user