Better summary results, uncluding flash attention settings.

This commit is contained in:
Donato Capitella
2025-08-09 11:58:42 +01:00
parent 995ad2cd38
commit f194848b26
2 changed files with 48 additions and 47 deletions
+11 -11
View File
@@ -159,18 +159,18 @@ Representative LLMs were tested on **AMD Ryzen AI Max “Strix Halo”** across
PP = prompt processing (tokens/sec prefill), TG = token generation (tokens/sec interactive). PP = prompt processing (tokens/sec prefill), TG = token generation (tokens/sec interactive).
| Model | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC | 🏆 Best PP | 🏆 Best TG | | Model | 🏆 Best PP | 🏆 Best TG | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC |
|---|---|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|---|---|
| **Gemma3 12B Q8_0** | 677 pp / 14.0 tg | 503 pp / 13.8 tg | 223 pp / 13.8 tg | 230 pp / 13.9 tg | 223 pp / 13.9 tg | 222 pp / 13.9 tg | 🏆 **AMDVLK** | 🏆 **AMDVLK** | | **Gemma3 12B Q8_0** | 🏆 **AMDVLK** (FA off) | 🏆 **AMDVLK** (FA off) | 677 pp (FA off) / 14.0 tg (FA off) | 503 pp (FA off) / 13.8 tg (FA off) | 223 pp (FA off) / 13.8 tg (FA off) | 230 pp (FA on) / 13.9 tg (FA off) | 223 pp (FA off) / 13.9 tg (FA off) | 222 pp (FA off) / 13.9 tg (FA off) |
| **Gemma3 27B BF16** | ⚠️ Load Error | 139 pp / 4.0 tg | 84 pp / 4.0 tg | 95 pp / 4.0 tg | 92 pp / 4.0 tg | 83 pp / 4.0 tg | 🏆 **RADV** | 🏆 **ROCm6.4.2+ROCWMMA** | | **Gemma3 27B BF16** | 🏆 **RADV** (FA on) | 🏆 **ROCm6.4.2+ROCWMMA** (FA off) | ⚠️ Load Error | 139 pp (FA on) / 4.0 tg (FA off) | 84 pp (FA on) / 4.0 tg (FA on) | 95 pp (FA on) / 4.0 tg (FA off) | 92 pp (FA off) / 4.0 tg (FA off) | 83 pp (FA on) / 4.0 tg (FA on) |
| **Llama-4-Scout 17B Q8_0** | 260 pp / 12.2 tg | 172 pp / 12.3 tg | 135 pp / 11.6 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** | | **Llama-4-Scout 17B Q8_0** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA off) | 260 pp (FA on) / 12.2 tg (FA off) | 172 pp (FA on) / 12.3 tg (FA off) | 135 pp (FA off) / 11.6 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error |
| **Llama-4-Scout 17B Q4_K XL** | 221 pp / 20.0 tg | 155 pp / 20.0 tg | 138 pp / 17.4 tg | ⚠️ GPU Hang | 139 pp / 17.6 tg | 124 pp / 17.6 tg | 🏆 **AMDVLK** | 🏆 **AMDVLK** | | **Llama-4-Scout 17B Q4_K XL** | 🏆 **AMDVLK** (FA on) | 🏆 **AMDVLK** (FA off) | 221 pp (FA on) / 20.0 tg (FA off) | 155 pp (FA on) / 20.0 tg (FA off) | 138 pp (FA off) / 17.4 tg (FA off) | ⚠️ GPU Hang | 139 pp (FA off) / 17.6 tg (FA off) | 124 pp (FA on) / 17.6 tg (FA on) |
| **Qwen3 30B BF16** | 108 pp / 8.0 tg | 87 pp / 7.4 tg | 158 pp / 24.3 tg | 162 pp / 24.5 tg | 153 pp / 24.5 tg | 152 pp / 24.6 tg | 🏆 **ROCm6.4.2+ROCWMMA** | 🏆 **ROCm7 RC** | | **Qwen3 30B BF16** | 🏆 **ROCm6.4.2+ROCWMMA** (FA on) | 🏆 **ROCm7 RC** (FA off) | 108 pp (FA on) / 8.0 tg (FA off) | 87 pp (FA on) / 7.4 tg (FA on) | 158 pp (FA off) / 24.3 tg (FA on) | 162 pp (FA on) / 24.5 tg (FA off) | 153 pp (FA off) / 24.5 tg (FA off) | 152 pp (FA off) / 24.6 tg (FA off) |
| **Qwen3-235B Q3_K XL** | 116 pp / 16.0 tg | 67 pp / 16.8 tg | 74 pp / 13.7 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** | | **Qwen3-235B Q3_K XL** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA on) | 116 pp (FA on) / 16.0 tg (FA off) | 67 pp (FA on) / 16.8 tg (FA on) | 74 pp (FA off) / 13.7 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error |
| **GLM-4.5-Air-Q4_K_XL** | 202 pp / 22.8 tg | 133 pp / 23.3 tg | 130 pp / 19.4 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | 130 pp / 20.1 tg | 🏆 **AMDVLK** | 🏆 **RADV** | | **GLM-4.5-Air-Q4_K_XL** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA on) | 202 pp (FA on) / 22.8 tg (FA on) | 133 pp (FA on) / 23.3 tg (FA on) | 130 pp (FA off) / 19.4 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | 130 pp (FA off) / 20.1 tg (FA on) |
| **GLM-4.5-Air-Q6_K_XL** | 225 pp / 16.5 tg | 132 pp / 17.0 tg | 125 pp / 15.3 tg | 114 pp / 15.5 tg | 121 pp / 15.5 tg | 124 pp / 15.5 tg | 🏆 **AMDVLK** | 🏆 **RADV** | | **GLM-4.5-Air-Q6_K_XL** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA on) | 225 pp (FA on) / 16.5 tg (FA on) | 132 pp (FA on) / 17.0 tg (FA on) | 125 pp (FA off) / 15.3 tg (FA off) | 114 pp (FA off) / 15.5 tg (FA off) | 121 pp (FA off) / 15.5 tg (FA off) | 124 pp (FA off) / 15.5 tg (FA off) |
| **gpt-oss-120b-mxfp4** | 546 pp / 48.1 tg | 255 pp / 49.0 tg | 353 pp / 44.1 tg | 408 pp / 45.0 tg | 355 pp / 45.0 tg | 353 pp / 45.1 tg | 🏆 **AMDVLK** | 🏆 **RADV** | | **gpt-oss-120b-mxfp4** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA off) | 546 pp (FA on) / 48.1 tg (FA off) | 255 pp (FA on) / 49.0 tg (FA off) | 353 pp (FA off) / 44.1 tg (FA off) | 408 pp (FA on) / 45.0 tg (FA off) | 355 pp (FA off) / 45.0 tg (FA off) | 353 pp (FA off) / 45.1 tg (FA off) |
| **gpt-oss-20b-mxfp4** | 1473 pp / 68.8 tg | 728 pp / 69.9 tg | 583 pp / 64.5 tg | 649 pp / 64.5 tg | 584 pp / 64.4 tg | 582 pp / 64.5 tg | 🏆 **AMDVLK** | 🏆 **RADV** | | **gpt-oss-20b-mxfp4** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA off) | 1473 pp (FA on) / 68.8 tg (FA off) | 728 pp (FA on) / 69.9 tg (FA off) | 583 pp (FA off) / 64.5 tg (FA off) | 649 pp (FA on) / 64.5 tg (FA off) | 584 pp (FA off) / 64.4 tg (FA off) | 582 pp (FA off) / 64.5 tg (FA off) |
**Observations:** **Observations:**
+37 -36
View File
@@ -36,7 +36,6 @@ ERROR_LABEL = {
"runtime": "⚠️ Runtime Error", "runtime": "⚠️ Runtime Error",
} }
# Display name → fuzzy key (case/UD/shard-insensitive)
DEFAULT_MODELS = [ DEFAULT_MODELS = [
("Gemma3 12B Q8_0", "gemma-3-12b"), ("Gemma3 12B Q8_0", "gemma-3-12b"),
("Gemma3 27B BF16", "gemma-3-27b"), ("Gemma3 27B BF16", "gemma-3-27b"),
@@ -54,16 +53,14 @@ SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
def norm_model(s: str) -> str: def norm_model(s: str) -> str:
s = (s or "").lower().replace("_", "-") s = (s or "").lower().replace("_", "-")
s = SHARD_RE.sub("", s) s = SHARD_RE.sub("", s)
s = s.replace("-ud", "") # drop -UD tag for matching s = s.replace("-ud", "")
return s return s
# Load JSON
raw = json.loads(Path(RESULTS_FILE).read_text(encoding="utf-8")) raw = json.loads(Path(RESULTS_FILE).read_text(encoding="utf-8"))
runs = raw["runs"] runs = raw["runs"]
# Bucket rows by (model_key, env, test, fa)
buckets = defaultdict(list) buckets = defaultdict(list)
error_only = defaultdict(list) # (model_key, env) -> [error_type,...] for test=None rows error_only = defaultdict(list)
all_models = set() all_models = set()
for r in runs: for r in runs:
@@ -72,30 +69,24 @@ for r in runs:
continue continue
mkey = norm_model(r.get("model_clean") or r.get("model") or "") mkey = norm_model(r.get("model_clean") or r.get("model") or "")
all_models.add(mkey) all_models.add(mkey)
test = r.get("test") # "pp512", "tg128", or None for pure errors test = r.get("test")
if test in ("pp512", "tg128"): if test in ("pp512", "tg128"):
buckets[(mkey, env, test)].append(r) buckets[(mkey, env, test)].append(r)
else: else:
# capture error-only rows so we can show ⚠️ instead of "—"
if r.get("error"): if r.get("error"):
error_only[(mkey, env)].append(r.get("error_type") or "runtime") error_only[(mkey, env)].append(r.get("error_type") or "runtime")
def pick_best(rows): def pick_best(rows):
"""Choose the best non-error row by tps_mean; if all error, return an error row.""" best, best_val, fallback = None, -1, None
best = None
best_val = -1
fallback = None
for r in rows: for r in rows:
if r.get("error"): if r.get("error"):
fallback = r fallback = r
continue continue
v = r.get("tps_mean") v = r.get("tps_mean")
if isinstance(v, (int, float)) and v > best_val: if isinstance(v, (int, float)) and v > best_val:
best_val = v best_val, best = v, r
best = r
return best or fallback return best or fallback
# Build chosen results per (model, env): {pp: row|None, tg: row|None, err_only: str|None}
chosen = defaultdict(lambda: defaultdict(dict)) chosen = defaultdict(lambda: defaultdict(dict))
for (mkey, env, test), rows in buckets.items(): for (mkey, env, test), rows in buckets.items():
chosen_row = pick_best(rows) chosen_row = pick_best(rows)
@@ -103,7 +94,6 @@ for (mkey, env, test), rows in buckets.items():
for (mkey, env), etypes in error_only.items(): for (mkey, env), etypes in error_only.items():
if etypes: if etypes:
# prefer specific types in a stable order
if "load" in etypes: if "load" in etypes:
chosen[mkey][env]["error_only"] = "load" chosen[mkey][env]["error_only"] = "load"
elif "hang" in etypes: elif "hang" in etypes:
@@ -111,42 +101,55 @@ for (mkey, env), etypes in error_only.items():
else: else:
chosen[mkey][env]["error_only"] = "runtime" chosen[mkey][env]["error_only"] = "runtime"
def fa_tag(row):
if not row or row.get("error"):
return ""
fa = row.get("fa")
if fa is None:
return ""
return " (FA on)" if fa else " (FA off)"
def format_cell(entry_dict): def format_cell(entry_dict):
pp = entry_dict.get("pp512") pp = entry_dict.get("pp512")
tg = entry_dict.get("tg128") tg = entry_dict.get("tg128")
# If either chosen row is an error, show that error (web UI behavior)
for row in (pp, tg): for row in (pp, tg):
if row and row.get("error"): if row and row.get("error"):
return ERROR_LABEL.get(row.get("error_type") or "runtime", "⚠️ Error") return ERROR_LABEL.get(row.get("error_type") or "runtime", "⚠️ Error")
# If both pp/tg missing but we have an error-only marker, show it
if not pp and not tg: if not pp and not tg:
et = entry_dict.get("error_only") et = entry_dict.get("error_only")
if et: if et:
return ERROR_LABEL.get(et, "⚠️ Error") return ERROR_LABEL.get(et, "⚠️ Error")
return "" # truly absent return ""
# Otherwise, print available values (partial allowed)
def fmt(v): def fmt(v):
return f"{int(round(v))}" if isinstance(v, (int, float)) else "" return f"{int(round(v))}" if isinstance(v, (int, float)) else ""
ppv = pp.get("tps_mean") if pp else None ppv = pp.get("tps_mean") if pp else None
tgv = tg.get("tps_mean") if tg else None tgv = tg.get("tps_mean") if tg else None
return f"{fmt(ppv)} pp / {tgv:.1f} tg" if isinstance(tgv, (int, float)) \ pp_suffix = fa_tag(pp)
else f"{fmt(ppv)} pp / — tg" tg_suffix = fa_tag(tg)
if isinstance(tgv, (int, float)):
return f"{fmt(ppv)} pp{pp_suffix} / {tgv:.1f} tg{tg_suffix}"
else:
return f"{fmt(ppv)} pp{pp_suffix} / — tg"
def best_env_for(mkey, test): def best_env_for(mkey, test):
best_env, best_val = None, -1 best_env, best_val, best_row = None, -1, None
for env in ENV_ORDER: for env in ENV_ORDER:
row = chosen[mkey].get(env, {}).get(test) row = chosen[mkey].get(env, {}).get(test)
if not row or row.get("error"): if not row or row.get("error"):
continue continue
v = row.get("tps_mean") v = row.get("tps_mean")
if isinstance(v, (int, float)) and v > best_val: if isinstance(v, (int, float)) and v > best_val:
best_env, best_val = env, v best_env, best_val, best_row = env, v, row
return best_env return best_env, (best_row.get("fa") if best_row else None)
def win_label(env, fa):
if not env:
return ""
base = WINNER_NAMES[env]
if fa is None:
return f"🏆 **{base}**"
return f"🏆 **{base}** ({'FA on' if fa else 'FA off'})"
# Fuzzy match helper
def find_model_key(fuzzy): def find_model_key(fuzzy):
needle = norm_model(fuzzy) needle = norm_model(fuzzy)
for k in all_models: for k in all_models:
@@ -154,21 +157,19 @@ def find_model_key(fuzzy):
return k return k
return None return None
# Print table # Header now has Best PP & Best TG right after Model
header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["🏆 Best PP", "🏆 Best TG"] header = ["Model", "🏆 Best PP", "🏆 Best TG"] + [COL_NAMES[e] for e in ENV_ORDER]
print("| " + " | ".join(header) + " |") print("| " + " | ".join(header) + " |")
print("|" + "|".join(["---"] * len(header)) + "|") print("|" + "|".join(["---"] * len(header)) + "|")
for disp, fuzzy in DEFAULT_MODELS: for disp, fuzzy in DEFAULT_MODELS:
mkey = find_model_key(fuzzy) mkey = find_model_key(fuzzy)
if not mkey: if not mkey:
print("| " + " | ".join([f"**{disp}**"] + [""]*len(ENV_ORDER) + ["",""]) + " |") print("| " + " | ".join([f"**{disp}**", "", ""] + [""]*len(ENV_ORDER)) + " |")
continue continue
row = [f"**{disp}**"] bpp_env, bpp_fa = best_env_for(mkey, "pp512")
btg_env, btg_fa = best_env_for(mkey, "tg128")
row = [f"**{disp}**", win_label(bpp_env, bpp_fa), win_label(btg_env, btg_fa)]
for env in ENV_ORDER: for env in ENV_ORDER:
row.append(format_cell(chosen[mkey].get(env, {}))) row.append(format_cell(chosen[mkey].get(env, {})))
bpp = best_env_for(mkey, "pp512")
btg = best_env_for(mkey, "tg128")
row.append(f"🏆 **{WINNER_NAMES[bpp]}**" if bpp else "")
row.append(f"🏆 **{WINNER_NAMES[btg]}**" if btg else "")
print("| " + " | ".join(row) + " |") print("| " + " | ".join(row) + " |")