Updated key benchmark findings
This commit is contained in:
@@ -161,16 +161,16 @@ PP = prompt processing (tokens/sec prefill), TG = token generation (tokens/sec i
|
|||||||
|
|
||||||
| Model | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC | 🏆 Best PP | 🏆 Best TG |
|
| Model | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC | 🏆 Best PP | 🏆 Best TG |
|
||||||
|---|---|---|---|---|---|---|---|---|
|
|---|---|---|---|---|---|---|---|---|
|
||||||
| **Gemma3 12B Q8_0** | 677 pp / 14.0 tg | 503 pp / 13.8 tg | 223 pp / 13.8 tg | 223 pp / 13.9 tg | 223 pp / 13.9 tg | 222 pp / 13.9 tg | 🏆 **AMDVLK** | — |
|
| **Gemma3 12B Q8_0** | 677 pp / 14.0 tg | 503 pp / 13.8 tg | 223 pp / 13.8 tg | 230 pp / 13.9 tg | 223 pp / 13.9 tg | 222 pp / 13.9 tg | 🏆 **AMDVLK** | 🏆 **AMDVLK** |
|
||||||
| **Gemma3 27B BF16** | — | 136 pp / 4.0 tg | 84 pp / 4.0 tg | 93 pp / 4.0 tg | 92 pp / 4.0 tg | 56 pp / 3.1 tg | 🏆 **RADV** | — |
|
| **Gemma3 27B BF16** | ⚠️ Load Error | 139 pp / 4.0 tg | 84 pp / 4.0 tg | 95 pp / 4.0 tg | 92 pp / 4.0 tg | 83 pp / 4.0 tg | 🏆 **RADV** | 🏆 **ROCm6.4.2+ROCWMMA** |
|
||||||
| **Llama-4-Scout 17B Q8_0** | 258 pp / 12.2 tg | 169 pp / 12.3 tg | 135 pp / 11.6 tg | — | — | — | 🏆 **AMDVLK** | — |
|
| **Llama-4-Scout 17B Q8_0** | 260 pp / 12.2 tg | 172 pp / 12.3 tg | 135 pp / 11.6 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** |
|
||||||
| **Llama-4-Scout 17B Q4_K XL** | 218 pp / 20.0 tg | 152 pp / 20.0 tg | 138 pp / 17.4 tg | — | 139 pp / 17.6 tg | 124 pp / 17.6 tg | 🏆 **AMDVLK** | — |
|
| **Llama-4-Scout 17B Q4_K XL** | 221 pp / 20.0 tg | 155 pp / 20.0 tg | 138 pp / 17.4 tg | ⚠️ GPU Hang | 139 pp / 17.6 tg | 124 pp / 17.6 tg | 🏆 **AMDVLK** | 🏆 **AMDVLK** |
|
||||||
| **Qwen3 30B BF16** | 107 pp / 8.0 tg | 86 pp / 7.4 tg | 158 pp / 23.9 tg | 158 pp / 24.5 tg | 153 pp / 24.5 tg | 152 pp / 24.6 tg | 🏆 **ROCm6.4.2+ROCWMMA** | — |
|
| **Qwen3 30B BF16** | 108 pp / 8.0 tg | 87 pp / 7.4 tg | 158 pp / 24.3 tg | 162 pp / 24.5 tg | 153 pp / 24.5 tg | 152 pp / 24.6 tg | 🏆 **ROCm6.4.2+ROCWMMA** | 🏆 **ROCm7 RC** |
|
||||||
| **Qwen3-235B Q3_K XL** | 114 pp / 16.0 tg | 65 pp / 16.6 tg | 74 pp / 13.7 tg | — | — | — | 🏆 **AMDVLK** | — |
|
| **Qwen3-235B Q3_K XL** | 116 pp / 16.0 tg | 67 pp / 16.8 tg | 74 pp / 13.7 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** |
|
||||||
| **GLM-4.5-Air-Q4_K_XL** | 201 pp / 22.8 tg | 128 pp / 22.9 tg | 130 pp / 19.4 tg | — | — | 130 pp / 19.8 tg | 🏆 **AMDVLK** | — |
|
| **GLM-4.5-Air-Q4_K_XL** | 202 pp / 22.8 tg | 133 pp / 23.3 tg | 130 pp / 19.4 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | 130 pp / 20.1 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
|
||||||
| **GLM-4.5-Air-Q6_K_XL** | 223 pp / 16.5 tg | 127 pp / 16.8 tg | 125 pp / 15.3 tg | 114 pp / 15.5 tg | 121 pp / 15.5 tg | 124 pp / 15.5 tg | 🏆 **AMDVLK** | — |
|
| **GLM-4.5-Air-Q6_K_XL** | 225 pp / 16.5 tg | 132 pp / 17.0 tg | 125 pp / 15.3 tg | 114 pp / 15.5 tg | 121 pp / 15.5 tg | 124 pp / 15.5 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
|
||||||
| **gpt-oss-120b-mxfp4** | 487 pp / 48.1 tg | 240 pp / 49.0 tg | 353 pp / 44.1 tg | 354 pp / 45.0 tg | 355 pp / 45.0 tg | 353 pp / 45.1 tg | 🏆 **AMDVLK** | — |
|
| **gpt-oss-120b-mxfp4** | 546 pp / 48.1 tg | 255 pp / 49.0 tg | 353 pp / 44.1 tg | 408 pp / 45.0 tg | 355 pp / 45.0 tg | 353 pp / 45.1 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
|
||||||
| **gpt-oss-20b-mxfp4** | 1205 pp / 68.8 tg | 649 pp / 69.9 tg | 583 pp / 64.5 tg | 581 pp / 64.5 tg | 584 pp / 64.4 tg | 582 pp / 64.5 tg | 🏆 **AMDVLK** | — |
|
| **gpt-oss-20b-mxfp4** | 1473 pp / 68.8 tg | 728 pp / 69.9 tg | 583 pp / 64.5 tg | 649 pp / 64.5 tg | 584 pp / 64.4 tg | 582 pp / 64.5 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
|
||||||
|
|
||||||
|
|
||||||
**Observations:**
|
**Observations:**
|
||||||
|
|||||||
@@ -1,118 +1,174 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import json
|
import json, re
|
||||||
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# --- Config ---
|
RESULTS_FILE = "../docs/results.json"
|
||||||
RESULTS_JSON = Path("../docs/results.json")
|
|
||||||
|
|
||||||
|
# Column order + labels
|
||||||
ENV_ORDER = [
|
ENV_ORDER = [
|
||||||
"vulkan_amdvlk",
|
"vulkan_amdvlk",
|
||||||
"vulkan_radv",
|
"vulkan_radv",
|
||||||
"rocm6_4_2",
|
"rocm6_4_2",
|
||||||
"rocm6_4_2-rocwmma",
|
"rocm6_4_2-rocwmma",
|
||||||
"rocm7_beta",
|
"rocm7_beta",
|
||||||
"rocm7_rc"
|
"rocm7_rc",
|
||||||
]
|
]
|
||||||
|
|
||||||
COL_NAMES = {
|
COL_NAMES = {
|
||||||
"vulkan_amdvlk": "Vulkan (AMDVLK)",
|
"vulkan_amdvlk": "Vulkan (AMDVLK)",
|
||||||
"vulkan_radv": "Vulkan (RADV)",
|
"vulkan_radv": "Vulkan (RADV)",
|
||||||
"rocm6_4_2": "ROCm 6.4.2",
|
"rocm6_4_2": "ROCm 6.4.2",
|
||||||
"rocm6_4_2-rocwmma": "ROCm 6.4.2 + ROCWMMA",
|
"rocm6_4_2-rocwmma": "ROCm 6.4.2 + ROCWMMA",
|
||||||
"rocm7_beta": "ROCm 7.0 Beta",
|
"rocm7_beta": "ROCm 7.0 Beta",
|
||||||
"rocm7_rc": "ROCm 7.0 RC"
|
"rocm7_rc": "ROCm 7.0 RC",
|
||||||
}
|
}
|
||||||
|
WINNER_NAMES = {
|
||||||
WINNER_LABELS = {
|
|
||||||
"vulkan_amdvlk": "AMDVLK",
|
"vulkan_amdvlk": "AMDVLK",
|
||||||
"vulkan_radv": "RADV",
|
"vulkan_radv": "RADV",
|
||||||
"rocm6_4_2": "ROCm6.4.2",
|
"rocm6_4_2": "ROCm6.4.2",
|
||||||
"rocm6_4_2-rocwmma": "ROCm6.4.2+ROCWMMA",
|
"rocm6_4_2-rocwmma": "ROCm6.4.2+ROCWMMA",
|
||||||
"rocm7_beta": "ROCm7 Beta",
|
"rocm7_beta": "ROCm7 Beta",
|
||||||
"rocm7_rc": "ROCm7 RC"
|
"rocm7_rc": "ROCm7 RC",
|
||||||
|
}
|
||||||
|
ERROR_LABEL = {
|
||||||
|
"load": "⚠️ Load Error",
|
||||||
|
"hang": "⚠️ GPU Hang",
|
||||||
|
"runtime": "⚠️ Runtime Error",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Display name → fuzzy key (case/UD/shard-insensitive)
|
||||||
DEFAULT_MODELS = [
|
DEFAULT_MODELS = [
|
||||||
("Gemma3 12B Q8_0", "gemma-3-12b-it-UD-Q8_K_XL"),
|
("Gemma3 12B Q8_0", "gemma-3-12b"),
|
||||||
("Gemma3 27B BF16", "gemma-3-27b-it-BF16"),
|
("Gemma3 27B BF16", "gemma-3-27b"),
|
||||||
("Llama-4-Scout 17B Q8_0", "Llama-4-Scout-17B-16E-Instruct-Q8_0"),
|
("Llama-4-Scout 17B Q8_0", "llama-4-scout-17b-16e-instruct-q8_0"),
|
||||||
("Llama-4-Scout 17B Q4_K XL", "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL"),
|
("Llama-4-Scout 17B Q4_K XL", "llama-4-scout-17b-16e-instruct-q4_k_xl"),
|
||||||
("Qwen3 30B BF16", "Qwen3-30B-A3B-BF16"),
|
("Qwen3 30B BF16", "qwen3-30b-a3b-bf16"),
|
||||||
("Qwen3-235B Q3_K XL", "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL"),
|
("Qwen3-235B Q3_K XL", "qwen3-235b-a22b"),
|
||||||
("GLM-4.5-Air-Q4_K_XL", "GLM-4.5-Air-UD-Q4_K_XL"),
|
("GLM-4.5-Air-Q4_K_XL", "glm-4.5-air-q4_k_xl"),
|
||||||
("GLM-4.5-Air-Q6_K_XL", "GLM-4.5-Air-UD-Q6_K_XL"),
|
("GLM-4.5-Air-Q6_K_XL", "glm-4.5-air-q6_k_xl"),
|
||||||
("gpt-oss-120b-mxfp4", "gpt-oss-120b-mxfp4"),
|
("gpt-oss-120b-mxfp4", "gpt-oss-120b-mxfp4"),
|
||||||
("gpt-oss-20b-mxfp4", "gpt-oss-20b-mxfp4"),
|
("gpt-oss-20b-mxfp4", "gpt-oss-20b-mxfp4"),
|
||||||
]
|
]
|
||||||
|
|
||||||
ERROR_LABELS = {
|
SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
|
||||||
"load": "⚠️ Load Error",
|
def norm_model(s: str) -> str:
|
||||||
"hang": "⚠️ GPU Hang",
|
s = (s or "").lower().replace("_", "-")
|
||||||
"runtime": "⚠️ Runtime Error"
|
s = SHARD_RE.sub("", s)
|
||||||
}
|
s = s.replace("-ud", "") # drop -UD tag for matching
|
||||||
|
return s
|
||||||
|
|
||||||
# --- Helpers ---
|
# Load JSON
|
||||||
def load_results():
|
raw = json.loads(Path(RESULTS_FILE).read_text(encoding="utf-8"))
|
||||||
data = json.loads(Path(RESULTS_JSON).read_text())
|
runs = raw["runs"]
|
||||||
return data["runs"]
|
|
||||||
|
|
||||||
def filter_runs(runs, model_prefix, env):
|
# Bucket rows by (model_key, env, test, fa)
|
||||||
for r in runs:
|
buckets = defaultdict(list)
|
||||||
if r["model_clean"].startswith(model_prefix) and r["env"] == env:
|
error_only = defaultdict(list) # (model_key, env) -> [error_type,...] for test=None rows
|
||||||
return r
|
all_models = set()
|
||||||
|
|
||||||
|
for r in runs:
|
||||||
|
env = r.get("env")
|
||||||
|
if env not in ENV_ORDER:
|
||||||
|
continue
|
||||||
|
mkey = norm_model(r.get("model_clean") or r.get("model") or "")
|
||||||
|
all_models.add(mkey)
|
||||||
|
test = r.get("test") # "pp512", "tg128", or None for pure errors
|
||||||
|
if test in ("pp512", "tg128"):
|
||||||
|
buckets[(mkey, env, test)].append(r)
|
||||||
|
else:
|
||||||
|
# capture error-only rows so we can show ⚠️ instead of "—"
|
||||||
|
if r.get("error"):
|
||||||
|
error_only[(mkey, env)].append(r.get("error_type") or "runtime")
|
||||||
|
|
||||||
|
def pick_best(rows):
|
||||||
|
"""Choose the best non-error row by tps_mean; if all error, return an error row."""
|
||||||
|
best = None
|
||||||
|
best_val = -1
|
||||||
|
fallback = None
|
||||||
|
for r in rows:
|
||||||
|
if r.get("error"):
|
||||||
|
fallback = r
|
||||||
|
continue
|
||||||
|
v = r.get("tps_mean")
|
||||||
|
if isinstance(v, (int, float)) and v > best_val:
|
||||||
|
best_val = v
|
||||||
|
best = r
|
||||||
|
return best or fallback
|
||||||
|
|
||||||
|
# Build chosen results per (model, env): {pp: row|None, tg: row|None, err_only: str|None}
|
||||||
|
chosen = defaultdict(lambda: defaultdict(dict))
|
||||||
|
for (mkey, env, test), rows in buckets.items():
|
||||||
|
chosen_row = pick_best(rows)
|
||||||
|
chosen[mkey][env][test] = chosen_row
|
||||||
|
|
||||||
|
for (mkey, env), etypes in error_only.items():
|
||||||
|
if etypes:
|
||||||
|
# prefer specific types in a stable order
|
||||||
|
if "load" in etypes:
|
||||||
|
chosen[mkey][env]["error_only"] = "load"
|
||||||
|
elif "hang" in etypes:
|
||||||
|
chosen[mkey][env]["error_only"] = "hang"
|
||||||
|
else:
|
||||||
|
chosen[mkey][env]["error_only"] = "runtime"
|
||||||
|
|
||||||
|
def format_cell(entry_dict):
|
||||||
|
pp = entry_dict.get("pp512")
|
||||||
|
tg = entry_dict.get("tg128")
|
||||||
|
|
||||||
|
# If either chosen row is an error, show that error (web UI behavior)
|
||||||
|
for row in (pp, tg):
|
||||||
|
if row and row.get("error"):
|
||||||
|
return ERROR_LABEL.get(row.get("error_type") or "runtime", "⚠️ Error")
|
||||||
|
|
||||||
|
# If both pp/tg missing but we have an error-only marker, show it
|
||||||
|
if not pp and not tg:
|
||||||
|
et = entry_dict.get("error_only")
|
||||||
|
if et:
|
||||||
|
return ERROR_LABEL.get(et, "⚠️ Error")
|
||||||
|
return "—" # truly absent
|
||||||
|
|
||||||
|
# Otherwise, print available values (partial allowed)
|
||||||
|
def fmt(v):
|
||||||
|
return f"{int(round(v))}" if isinstance(v, (int, float)) else "—"
|
||||||
|
ppv = pp.get("tps_mean") if pp else None
|
||||||
|
tgv = tg.get("tps_mean") if tg else None
|
||||||
|
return f"{fmt(ppv)} pp / {tgv:.1f} tg" if isinstance(tgv, (int, float)) \
|
||||||
|
else f"{fmt(ppv)} pp / — tg"
|
||||||
|
|
||||||
|
def best_env_for(mkey, test):
|
||||||
|
best_env, best_val = None, -1
|
||||||
|
for env in ENV_ORDER:
|
||||||
|
row = chosen[mkey].get(env, {}).get(test)
|
||||||
|
if not row or row.get("error"):
|
||||||
|
continue
|
||||||
|
v = row.get("tps_mean")
|
||||||
|
if isinstance(v, (int, float)) and v > best_val:
|
||||||
|
best_env, best_val = env, v
|
||||||
|
return best_env
|
||||||
|
|
||||||
|
# Fuzzy match helper
|
||||||
|
def find_model_key(fuzzy):
|
||||||
|
needle = norm_model(fuzzy)
|
||||||
|
for k in all_models:
|
||||||
|
if needle in k:
|
||||||
|
return k
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def format_cell(pp_run, tg_run):
|
# Print table
|
||||||
if not pp_run or not tg_run:
|
header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["🏆 Best PP", "🏆 Best TG"]
|
||||||
return "—"
|
print("| " + " | ".join(header) + " |")
|
||||||
if pp_run["error"] or tg_run["error"]:
|
print("|" + "|".join(["---"] * len(header)) + "|")
|
||||||
return ERROR_LABELS.get(pp_run["error_type"] or tg_run["error_type"], "⚠️ Error")
|
|
||||||
if pp_run["tps_mean"] is None or tg_run["tps_mean"] is None:
|
|
||||||
return "—"
|
|
||||||
return f"{int(round(pp_run['tps_mean']))} pp / {tg_run['tps_mean']:.1f} tg"
|
|
||||||
|
|
||||||
def find_winner(runs, model_prefix, bench_type):
|
for disp, fuzzy in DEFAULT_MODELS:
|
||||||
vals = {}
|
mkey = find_model_key(fuzzy)
|
||||||
|
if not mkey:
|
||||||
|
print("| " + " | ".join([f"**{disp}**"] + ["—"]*len(ENV_ORDER) + ["—","—"]) + " |")
|
||||||
|
continue
|
||||||
|
row = [f"**{disp}**"]
|
||||||
for env in ENV_ORDER:
|
for env in ENV_ORDER:
|
||||||
r = filter_runs(runs, model_prefix, env)
|
row.append(format_cell(chosen[mkey].get(env, {})))
|
||||||
if r and not r["error"] and r["test"] == bench_type and r["tps_mean"] is not None:
|
bpp = best_env_for(mkey, "pp512")
|
||||||
vals[env] = r["tps_mean"]
|
btg = best_env_for(mkey, "tg128")
|
||||||
if not vals:
|
row.append(f"🏆 **{WINNER_NAMES[bpp]}**" if bpp else "—")
|
||||||
return None
|
row.append(f"🏆 **{WINNER_NAMES[btg]}**" if btg else "—")
|
||||||
return max(vals, key=vals.get)
|
|
||||||
|
|
||||||
# --- Main ---
|
|
||||||
def main():
|
|
||||||
runs = load_results()
|
|
||||||
|
|
||||||
header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["🏆 Best PP", "🏆 Best TG"]
|
|
||||||
print("| " + " | ".join(header) + " |")
|
|
||||||
print("|" + "|".join(["---"] * len(header)) + "|")
|
|
||||||
|
|
||||||
for disp_name, model_prefix in DEFAULT_MODELS:
|
|
||||||
row = [f"**{disp_name}**"]
|
|
||||||
for env in ENV_ORDER:
|
|
||||||
pp_run = filter_runs(runs, model_prefix, env)
|
|
||||||
tg_run = filter_runs(runs, model_prefix, env)
|
|
||||||
pp = None
|
|
||||||
tg = None
|
|
||||||
if pp_run and pp_run["test"] == "pp512":
|
|
||||||
pp = pp_run
|
|
||||||
if tg_run and tg_run["test"] == "tg128":
|
|
||||||
tg = tg_run
|
|
||||||
# match pp and tg runs by env
|
|
||||||
pp_env_run = next((r for r in runs if r["model_clean"].startswith(model_prefix) and r["env"] == env and r["test"] == "pp512"), None)
|
|
||||||
tg_env_run = next((r for r in runs if r["model_clean"].startswith(model_prefix) and r["env"] == env and r["test"] == "tg128"), None)
|
|
||||||
row.append(format_cell(pp_env_run, tg_env_run))
|
|
||||||
|
|
||||||
bpp = find_winner(runs, model_prefix, "pp512")
|
|
||||||
btg = find_winner(runs, model_prefix, "tg128")
|
|
||||||
row.append(f"🏆 **{WINNER_LABELS[bpp]}**" if bpp else "—")
|
|
||||||
row.append(f"🏆 **{WINNER_LABELS[btg]}**" if btg else "—")
|
|
||||||
|
|
||||||
print("| " + " | ".join(row) + " |")
|
print("| " + " | ".join(row) + " |")
|
||||||
|
|
||||||
print("\nFull interactive results: [Live Benchmark Viewer](https://your-live-results-url)")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user