diff --git a/README.md b/README.md index 897854f..0515886 100644 --- a/README.md +++ b/README.md @@ -159,18 +159,18 @@ Representative LLMs were tested on **AMD Ryzen AI Max β€œStrix Halo”** across PP = prompt processing (tokens/sec prefill), TG = token generation (tokens/sec interactive). -| Model | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC | πŸ† Best PP | πŸ† Best TG | +| Model | πŸ† Best PP | πŸ† Best TG | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC | |---|---|---|---|---|---|---|---|---| -| **Gemma3 12B Q8_0** | 677 pp / 14.0 tg | 503 pp / 13.8 tg | 223 pp / 13.8 tg | 230 pp / 13.9 tg | 223 pp / 13.9 tg | 222 pp / 13.9 tg | πŸ† **AMDVLK** | πŸ† **AMDVLK** | -| **Gemma3 27B BF16** | ⚠️ Load Error | 139 pp / 4.0 tg | 84 pp / 4.0 tg | 95 pp / 4.0 tg | 92 pp / 4.0 tg | 83 pp / 4.0 tg | πŸ† **RADV** | πŸ† **ROCm6.4.2+ROCWMMA** | -| **Llama-4-Scout 17B Q8_0** | 260 pp / 12.2 tg | 172 pp / 12.3 tg | 135 pp / 11.6 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | πŸ† **AMDVLK** | πŸ† **RADV** | -| **Llama-4-Scout 17B Q4_K XL** | 221 pp / 20.0 tg | 155 pp / 20.0 tg | 138 pp / 17.4 tg | ⚠️ GPU Hang | 139 pp / 17.6 tg | 124 pp / 17.6 tg | πŸ† **AMDVLK** | πŸ† **AMDVLK** | -| **Qwen3 30B BF16** | 108 pp / 8.0 tg | 87 pp / 7.4 tg | 158 pp / 24.3 tg | 162 pp / 24.5 tg | 153 pp / 24.5 tg | 152 pp / 24.6 tg | πŸ† **ROCm6.4.2+ROCWMMA** | πŸ† **ROCm7 RC** | -| **Qwen3-235B Q3_K XL** | 116 pp / 16.0 tg | 67 pp / 16.8 tg | 74 pp / 13.7 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | πŸ† **AMDVLK** | πŸ† **RADV** | -| **GLM-4.5-Air-Q4_K_XL** | 202 pp / 22.8 tg | 133 pp / 23.3 tg | 130 pp / 19.4 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | 130 pp / 20.1 tg | πŸ† **AMDVLK** | πŸ† **RADV** | -| **GLM-4.5-Air-Q6_K_XL** | 225 pp / 16.5 tg | 132 pp / 17.0 tg | 125 pp / 15.3 tg | 114 pp / 15.5 tg | 121 pp / 15.5 tg | 124 pp / 15.5 tg | πŸ† **AMDVLK** | πŸ† **RADV** | -| **gpt-oss-120b-mxfp4** | 546 pp / 48.1 tg | 255 pp / 49.0 tg | 353 pp / 44.1 tg | 408 pp / 45.0 tg | 355 pp / 45.0 tg | 353 pp / 45.1 tg | πŸ† **AMDVLK** | πŸ† **RADV** | -| **gpt-oss-20b-mxfp4** | 1473 pp / 68.8 tg | 728 pp / 69.9 tg | 583 pp / 64.5 tg | 649 pp / 64.5 tg | 584 pp / 64.4 tg | 582 pp / 64.5 tg | πŸ† **AMDVLK** | πŸ† **RADV** | +| **Gemma3 12B Q8_0** | πŸ† **AMDVLK** (FA off) | πŸ† **AMDVLK** (FA off) | 677 pp (FA off) / 14.0 tg (FA off) | 503 pp (FA off) / 13.8 tg (FA off) | 223 pp (FA off) / 13.8 tg (FA off) | 230 pp (FA on) / 13.9 tg (FA off) | 223 pp (FA off) / 13.9 tg (FA off) | 222 pp (FA off) / 13.9 tg (FA off) | +| **Gemma3 27B BF16** | πŸ† **RADV** (FA on) | πŸ† **ROCm6.4.2+ROCWMMA** (FA off) | ⚠️ Load Error | 139 pp (FA on) / 4.0 tg (FA off) | 84 pp (FA on) / 4.0 tg (FA on) | 95 pp (FA on) / 4.0 tg (FA off) | 92 pp (FA off) / 4.0 tg (FA off) | 83 pp (FA on) / 4.0 tg (FA on) | +| **Llama-4-Scout 17B Q8_0** | πŸ† **AMDVLK** (FA on) | πŸ† **RADV** (FA off) | 260 pp (FA on) / 12.2 tg (FA off) | 172 pp (FA on) / 12.3 tg (FA off) | 135 pp (FA off) / 11.6 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | +| **Llama-4-Scout 17B Q4_K XL** | πŸ† **AMDVLK** (FA on) | πŸ† **AMDVLK** (FA off) | 221 pp (FA on) / 20.0 tg (FA off) | 155 pp (FA on) / 20.0 tg (FA off) | 138 pp (FA off) / 17.4 tg (FA off) | ⚠️ GPU Hang | 139 pp (FA off) / 17.6 tg (FA off) | 124 pp (FA on) / 17.6 tg (FA on) | +| **Qwen3 30B BF16** | πŸ† **ROCm6.4.2+ROCWMMA** (FA on) | πŸ† **ROCm7 RC** (FA off) | 108 pp (FA on) / 8.0 tg (FA off) | 87 pp (FA on) / 7.4 tg (FA on) | 158 pp (FA off) / 24.3 tg (FA on) | 162 pp (FA on) / 24.5 tg (FA off) | 153 pp (FA off) / 24.5 tg (FA off) | 152 pp (FA off) / 24.6 tg (FA off) | +| **Qwen3-235B Q3_K XL** | πŸ† **AMDVLK** (FA on) | πŸ† **RADV** (FA on) | 116 pp (FA on) / 16.0 tg (FA off) | 67 pp (FA on) / 16.8 tg (FA on) | 74 pp (FA off) / 13.7 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | +| **GLM-4.5-Air-Q4_K_XL** | πŸ† **AMDVLK** (FA on) | πŸ† **RADV** (FA on) | 202 pp (FA on) / 22.8 tg (FA on) | 133 pp (FA on) / 23.3 tg (FA on) | 130 pp (FA off) / 19.4 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | 130 pp (FA off) / 20.1 tg (FA on) | +| **GLM-4.5-Air-Q6_K_XL** | πŸ† **AMDVLK** (FA on) | πŸ† **RADV** (FA on) | 225 pp (FA on) / 16.5 tg (FA on) | 132 pp (FA on) / 17.0 tg (FA on) | 125 pp (FA off) / 15.3 tg (FA off) | 114 pp (FA off) / 15.5 tg (FA off) | 121 pp (FA off) / 15.5 tg (FA off) | 124 pp (FA off) / 15.5 tg (FA off) | +| **gpt-oss-120b-mxfp4** | πŸ† **AMDVLK** (FA on) | πŸ† **RADV** (FA off) | 546 pp (FA on) / 48.1 tg (FA off) | 255 pp (FA on) / 49.0 tg (FA off) | 353 pp (FA off) / 44.1 tg (FA off) | 408 pp (FA on) / 45.0 tg (FA off) | 355 pp (FA off) / 45.0 tg (FA off) | 353 pp (FA off) / 45.1 tg (FA off) | +| **gpt-oss-20b-mxfp4** | πŸ† **AMDVLK** (FA on) | πŸ† **RADV** (FA off) | 1473 pp (FA on) / 68.8 tg (FA off) | 728 pp (FA on) / 69.9 tg (FA off) | 583 pp (FA off) / 64.5 tg (FA off) | 649 pp (FA on) / 64.5 tg (FA off) | 584 pp (FA off) / 64.4 tg (FA off) | 582 pp (FA off) / 64.5 tg (FA off) | **Observations:** diff --git a/benchmark/generate_readme_summary.py b/benchmark/generate_readme_summary.py index 476351c..8a87d7b 100644 --- a/benchmark/generate_readme_summary.py +++ b/benchmark/generate_readme_summary.py @@ -36,7 +36,6 @@ ERROR_LABEL = { "runtime": "⚠️ Runtime Error", } -# Display name β†’ fuzzy key (case/UD/shard-insensitive) DEFAULT_MODELS = [ ("Gemma3 12B Q8_0", "gemma-3-12b"), ("Gemma3 27B BF16", "gemma-3-27b"), @@ -54,16 +53,14 @@ SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE) def norm_model(s: str) -> str: s = (s or "").lower().replace("_", "-") s = SHARD_RE.sub("", s) - s = s.replace("-ud", "") # drop -UD tag for matching + s = s.replace("-ud", "") return s -# Load JSON raw = json.loads(Path(RESULTS_FILE).read_text(encoding="utf-8")) runs = raw["runs"] -# Bucket rows by (model_key, env, test, fa) buckets = defaultdict(list) -error_only = defaultdict(list) # (model_key, env) -> [error_type,...] for test=None rows +error_only = defaultdict(list) all_models = set() for r in runs: @@ -72,30 +69,24 @@ for r in runs: continue mkey = norm_model(r.get("model_clean") or r.get("model") or "") all_models.add(mkey) - test = r.get("test") # "pp512", "tg128", or None for pure errors + test = r.get("test") if test in ("pp512", "tg128"): buckets[(mkey, env, test)].append(r) else: - # capture error-only rows so we can show ⚠️ instead of "β€”" if r.get("error"): error_only[(mkey, env)].append(r.get("error_type") or "runtime") def pick_best(rows): - """Choose the best non-error row by tps_mean; if all error, return an error row.""" - best = None - best_val = -1 - fallback = None + best, best_val, fallback = None, -1, None for r in rows: if r.get("error"): fallback = r continue v = r.get("tps_mean") if isinstance(v, (int, float)) and v > best_val: - best_val = v - best = r + best_val, best = v, r return best or fallback -# Build chosen results per (model, env): {pp: row|None, tg: row|None, err_only: str|None} chosen = defaultdict(lambda: defaultdict(dict)) for (mkey, env, test), rows in buckets.items(): chosen_row = pick_best(rows) @@ -103,7 +94,6 @@ for (mkey, env, test), rows in buckets.items(): for (mkey, env), etypes in error_only.items(): if etypes: - # prefer specific types in a stable order if "load" in etypes: chosen[mkey][env]["error_only"] = "load" elif "hang" in etypes: @@ -111,42 +101,55 @@ for (mkey, env), etypes in error_only.items(): else: chosen[mkey][env]["error_only"] = "runtime" +def fa_tag(row): + if not row or row.get("error"): + return "" + fa = row.get("fa") + if fa is None: + return "" + return " (FA on)" if fa else " (FA off)" + def format_cell(entry_dict): pp = entry_dict.get("pp512") tg = entry_dict.get("tg128") - - # If either chosen row is an error, show that error (web UI behavior) for row in (pp, tg): if row and row.get("error"): return ERROR_LABEL.get(row.get("error_type") or "runtime", "⚠️ Error") - - # If both pp/tg missing but we have an error-only marker, show it if not pp and not tg: et = entry_dict.get("error_only") if et: return ERROR_LABEL.get(et, "⚠️ Error") - return "β€”" # truly absent - - # Otherwise, print available values (partial allowed) + return "β€”" def fmt(v): return f"{int(round(v))}" if isinstance(v, (int, float)) else "β€”" ppv = pp.get("tps_mean") if pp else None tgv = tg.get("tps_mean") if tg else None - return f"{fmt(ppv)} pp / {tgv:.1f} tg" if isinstance(tgv, (int, float)) \ - else f"{fmt(ppv)} pp / β€” tg" + pp_suffix = fa_tag(pp) + tg_suffix = fa_tag(tg) + if isinstance(tgv, (int, float)): + return f"{fmt(ppv)} pp{pp_suffix} / {tgv:.1f} tg{tg_suffix}" + else: + return f"{fmt(ppv)} pp{pp_suffix} / β€” tg" def best_env_for(mkey, test): - best_env, best_val = None, -1 + best_env, best_val, best_row = None, -1, None for env in ENV_ORDER: row = chosen[mkey].get(env, {}).get(test) if not row or row.get("error"): continue v = row.get("tps_mean") if isinstance(v, (int, float)) and v > best_val: - best_env, best_val = env, v - return best_env + best_env, best_val, best_row = env, v, row + return best_env, (best_row.get("fa") if best_row else None) + +def win_label(env, fa): + if not env: + return "β€”" + base = WINNER_NAMES[env] + if fa is None: + return f"πŸ† **{base}**" + return f"πŸ† **{base}** ({'FA on' if fa else 'FA off'})" -# Fuzzy match helper def find_model_key(fuzzy): needle = norm_model(fuzzy) for k in all_models: @@ -154,21 +157,19 @@ def find_model_key(fuzzy): return k return None -# Print table -header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["πŸ† Best PP", "πŸ† Best TG"] +# Header now has Best PP & Best TG right after Model +header = ["Model", "πŸ† Best PP", "πŸ† Best TG"] + [COL_NAMES[e] for e in ENV_ORDER] print("| " + " | ".join(header) + " |") print("|" + "|".join(["---"] * len(header)) + "|") for disp, fuzzy in DEFAULT_MODELS: mkey = find_model_key(fuzzy) if not mkey: - print("| " + " | ".join([f"**{disp}**"] + ["β€”"]*len(ENV_ORDER) + ["β€”","β€”"]) + " |") + print("| " + " | ".join([f"**{disp}**", "β€”", "β€”"] + ["β€”"]*len(ENV_ORDER)) + " |") continue - row = [f"**{disp}**"] + bpp_env, bpp_fa = best_env_for(mkey, "pp512") + btg_env, btg_fa = best_env_for(mkey, "tg128") + row = [f"**{disp}**", win_label(bpp_env, bpp_fa), win_label(btg_env, btg_fa)] for env in ENV_ORDER: row.append(format_cell(chosen[mkey].get(env, {}))) - bpp = best_env_for(mkey, "pp512") - btg = best_env_for(mkey, "tg128") - row.append(f"πŸ† **{WINNER_NAMES[bpp]}**" if bpp else "β€”") - row.append(f"πŸ† **{WINNER_NAMES[btg]}**" if btg else "β€”") print("| " + " | ".join(row) + " |")