Better summary results, uncluding flash attention settings.

2025-08-09 11:58:42 +01:00
parent 995ad2cd38
commit f194848b26
2 changed files with 48 additions and 47 deletions
@@ -159,18 +159,18 @@ Representative LLMs were tested on **AMD Ryzen AI Max “Strix Halo”** across

 PP = prompt processing (tokens/sec prefill), TG = token generation (tokens/sec interactive).

-| Model | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC | 🏆 Best PP | 🏆 Best TG |
+| Model | 🏆 Best PP | 🏆 Best TG | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 6.4.2 + ROCWMMA | ROCm 7.0 Beta | ROCm 7.0 RC |
 |---|---|---|---|---|---|---|---|---|
-| **Gemma3 12B Q8_0** | 677 pp / 14.0 tg | 503 pp / 13.8 tg | 223 pp / 13.8 tg | 230 pp / 13.9 tg | 223 pp / 13.9 tg | 222 pp / 13.9 tg | 🏆 **AMDVLK** | 🏆 **AMDVLK** |
-| **Gemma3 27B BF16** | ⚠️ Load Error | 139 pp / 4.0 tg | 84 pp / 4.0 tg | 95 pp / 4.0 tg | 92 pp / 4.0 tg | 83 pp / 4.0 tg | 🏆 **RADV** | 🏆 **ROCm6.4.2+ROCWMMA** |
-| **Llama-4-Scout 17B Q8_0** | 260 pp / 12.2 tg | 172 pp / 12.3 tg | 135 pp / 11.6 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** |
-| **Llama-4-Scout 17B Q4_K XL** | 221 pp / 20.0 tg | 155 pp / 20.0 tg | 138 pp / 17.4 tg | ⚠️ GPU Hang | 139 pp / 17.6 tg | 124 pp / 17.6 tg | 🏆 **AMDVLK** | 🏆 **AMDVLK** |
-| **Qwen3 30B BF16** | 108 pp / 8.0 tg | 87 pp / 7.4 tg | 158 pp / 24.3 tg | 162 pp / 24.5 tg | 153 pp / 24.5 tg | 152 pp / 24.6 tg | 🏆 **ROCm6.4.2+ROCWMMA** | 🏆 **ROCm7 RC** |
-| **Qwen3-235B Q3_K XL** | 116 pp / 16.0 tg | 67 pp / 16.8 tg | 74 pp / 13.7 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** |
-| **GLM-4.5-Air-Q4_K_XL** | 202 pp / 22.8 tg | 133 pp / 23.3 tg | 130 pp / 19.4 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | 130 pp / 20.1 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
-| **GLM-4.5-Air-Q6_K_XL** | 225 pp / 16.5 tg | 132 pp / 17.0 tg | 125 pp / 15.3 tg | 114 pp / 15.5 tg | 121 pp / 15.5 tg | 124 pp / 15.5 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
-| **gpt-oss-120b-mxfp4** | 546 pp / 48.1 tg | 255 pp / 49.0 tg | 353 pp / 44.1 tg | 408 pp / 45.0 tg | 355 pp / 45.0 tg | 353 pp / 45.1 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
-| **gpt-oss-20b-mxfp4** | 1473 pp / 68.8 tg | 728 pp / 69.9 tg | 583 pp / 64.5 tg | 649 pp / 64.5 tg | 584 pp / 64.4 tg | 582 pp / 64.5 tg | 🏆 **AMDVLK** | 🏆 **RADV** |
+| **Gemma3 12B Q8_0** | 🏆 **AMDVLK** (FA off) | 🏆 **AMDVLK** (FA off) | 677 pp (FA off) / 14.0 tg (FA off) | 503 pp (FA off) / 13.8 tg (FA off) | 223 pp (FA off) / 13.8 tg (FA off) | 230 pp (FA on) / 13.9 tg (FA off) | 223 pp (FA off) / 13.9 tg (FA off) | 222 pp (FA off) / 13.9 tg (FA off) |
+| **Gemma3 27B BF16** | 🏆 **RADV** (FA on) | 🏆 **ROCm6.4.2+ROCWMMA** (FA off) | ⚠️ Load Error | 139 pp (FA on) / 4.0 tg (FA off) | 84 pp (FA on) / 4.0 tg (FA on) | 95 pp (FA on) / 4.0 tg (FA off) | 92 pp (FA off) / 4.0 tg (FA off) | 83 pp (FA on) / 4.0 tg (FA on) |
+| **Llama-4-Scout 17B Q8_0** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA off) | 260 pp (FA on) / 12.2 tg (FA off) | 172 pp (FA on) / 12.3 tg (FA off) | 135 pp (FA off) / 11.6 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error |
+| **Llama-4-Scout 17B Q4_K XL** | 🏆 **AMDVLK** (FA on) | 🏆 **AMDVLK** (FA off) | 221 pp (FA on) / 20.0 tg (FA off) | 155 pp (FA on) / 20.0 tg (FA off) | 138 pp (FA off) / 17.4 tg (FA off) | ⚠️ GPU Hang | 139 pp (FA off) / 17.6 tg (FA off) | 124 pp (FA on) / 17.6 tg (FA on) |
+| **Qwen3 30B BF16** | 🏆 **ROCm6.4.2+ROCWMMA** (FA on) | 🏆 **ROCm7 RC** (FA off) | 108 pp (FA on) / 8.0 tg (FA off) | 87 pp (FA on) / 7.4 tg (FA on) | 158 pp (FA off) / 24.3 tg (FA on) | 162 pp (FA on) / 24.5 tg (FA off) | 153 pp (FA off) / 24.5 tg (FA off) | 152 pp (FA off) / 24.6 tg (FA off) |
+| **Qwen3-235B Q3_K XL** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA on) | 116 pp (FA on) / 16.0 tg (FA off) | 67 pp (FA on) / 16.8 tg (FA on) | 74 pp (FA off) / 13.7 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error |
+| **GLM-4.5-Air-Q4_K_XL** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA on) | 202 pp (FA on) / 22.8 tg (FA on) | 133 pp (FA on) / 23.3 tg (FA on) | 130 pp (FA off) / 19.4 tg (FA off) | ⚠️ GPU Hang | ⚠️ GPU Hang | 130 pp (FA off) / 20.1 tg (FA on) |
+| **GLM-4.5-Air-Q6_K_XL** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA on) | 225 pp (FA on) / 16.5 tg (FA on) | 132 pp (FA on) / 17.0 tg (FA on) | 125 pp (FA off) / 15.3 tg (FA off) | 114 pp (FA off) / 15.5 tg (FA off) | 121 pp (FA off) / 15.5 tg (FA off) | 124 pp (FA off) / 15.5 tg (FA off) |
+| **gpt-oss-120b-mxfp4** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA off) | 546 pp (FA on) / 48.1 tg (FA off) | 255 pp (FA on) / 49.0 tg (FA off) | 353 pp (FA off) / 44.1 tg (FA off) | 408 pp (FA on) / 45.0 tg (FA off) | 355 pp (FA off) / 45.0 tg (FA off) | 353 pp (FA off) / 45.1 tg (FA off) |
+| **gpt-oss-20b-mxfp4** | 🏆 **AMDVLK** (FA on) | 🏆 **RADV** (FA off) | 1473 pp (FA on) / 68.8 tg (FA off) | 728 pp (FA on) / 69.9 tg (FA off) | 583 pp (FA off) / 64.5 tg (FA off) | 649 pp (FA on) / 64.5 tg (FA off) | 584 pp (FA off) / 64.4 tg (FA off) | 582 pp (FA off) / 64.5 tg (FA off) |


 **Observations:**
@@ -36,7 +36,6 @@ ERROR_LABEL = {
    "runtime": "⚠️ Runtime Error",
 }

-# Display name → fuzzy key (case/UD/shard-insensitive)
 DEFAULT_MODELS = [
    ("Gemma3 12B Q8_0",            "gemma-3-12b"),
    ("Gemma3 27B BF16",            "gemma-3-27b"),
@@ -54,16 +53,14 @@ SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
 def norm_model(s: str) -> str:
    s = (s or "").lower().replace("_", "-")
    s = SHARD_RE.sub("", s)
-    s = s.replace("-ud", "")  # drop -UD tag for matching
+    s = s.replace("-ud", "")
    return s

-# Load JSON
 raw = json.loads(Path(RESULTS_FILE).read_text(encoding="utf-8"))
 runs = raw["runs"]

-# Bucket rows by (model_key, env, test, fa)
 buckets = defaultdict(list)
-error_only = defaultdict(list)  # (model_key, env) -> [error_type,...] for test=None rows
+error_only = defaultdict(list)
 all_models = set()

 for r in runs:
@@ -72,30 +69,24 @@ for r in runs:
        continue
    mkey = norm_model(r.get("model_clean") or r.get("model") or "")
    all_models.add(mkey)
-    test = r.get("test")  # "pp512", "tg128", or None for pure errors
+    test = r.get("test")
    if test in ("pp512", "tg128"):
        buckets[(mkey, env, test)].append(r)
    else:
-        # capture error-only rows so we can show ⚠️ instead of "—"
        if r.get("error"):
            error_only[(mkey, env)].append(r.get("error_type") or "runtime")

 def pick_best(rows):
-    """Choose the best non-error row by tps_mean; if all error, return an error row."""
-    best = None
-    best_val = -1
-    fallback = None
+    best, best_val, fallback = None, -1, None
    for r in rows:
        if r.get("error"):
            fallback = r
            continue
        v = r.get("tps_mean")
        if isinstance(v, (int, float)) and v > best_val:
-            best_val = v
-            best = r
+            best_val, best = v, r
    return best or fallback

-# Build chosen results per (model, env): {pp: row|None, tg: row|None, err_only: str|None}
 chosen = defaultdict(lambda: defaultdict(dict))
 for (mkey, env, test), rows in buckets.items():
    chosen_row = pick_best(rows)
@@ -103,7 +94,6 @@ for (mkey, env, test), rows in buckets.items():

 for (mkey, env), etypes in error_only.items():
    if etypes:
-        # prefer specific types in a stable order
        if "load" in etypes:
            chosen[mkey][env]["error_only"] = "load"
        elif "hang" in etypes:
@@ -111,42 +101,55 @@ for (mkey, env), etypes in error_only.items():
        else:
            chosen[mkey][env]["error_only"] = "runtime"

+def fa_tag(row):
+    if not row or row.get("error"):
+        return ""
+    fa = row.get("fa")
+    if fa is None:
+        return ""
+    return " (FA on)" if fa else " (FA off)"
+
 def format_cell(entry_dict):
    pp = entry_dict.get("pp512")
    tg = entry_dict.get("tg128")
-
-    # If either chosen row is an error, show that error (web UI behavior)
    for row in (pp, tg):
        if row and row.get("error"):
            return ERROR_LABEL.get(row.get("error_type") or "runtime", "⚠️ Error")
-
-    # If both pp/tg missing but we have an error-only marker, show it
    if not pp and not tg:
        et = entry_dict.get("error_only")
        if et:
            return ERROR_LABEL.get(et, "⚠️ Error")
-        return "—"  # truly absent
-
-    # Otherwise, print available values (partial allowed)
+        return "—"
    def fmt(v):
        return f"{int(round(v))}" if isinstance(v, (int, float)) else "—"
    ppv = pp.get("tps_mean") if pp else None
    tgv = tg.get("tps_mean") if tg else None
-    return f"{fmt(ppv)} pp / {tgv:.1f} tg" if isinstance(tgv, (int, float)) \
-           else f"{fmt(ppv)} pp / — tg"
+    pp_suffix = fa_tag(pp)
+    tg_suffix = fa_tag(tg)
+    if isinstance(tgv, (int, float)):
+        return f"{fmt(ppv)} pp{pp_suffix} / {tgv:.1f} tg{tg_suffix}"
+    else:
+        return f"{fmt(ppv)} pp{pp_suffix} / — tg"

 def best_env_for(mkey, test):
-    best_env, best_val = None, -1
+    best_env, best_val, best_row = None, -1, None
    for env in ENV_ORDER:
        row = chosen[mkey].get(env, {}).get(test)
        if not row or row.get("error"):
            continue
        v = row.get("tps_mean")
        if isinstance(v, (int, float)) and v > best_val:
-            best_env, best_val = env, v
-    return best_env
+            best_env, best_val, best_row = env, v, row
+    return best_env, (best_row.get("fa") if best_row else None)
+
+def win_label(env, fa):
+    if not env:
+        return "—"
+    base = WINNER_NAMES[env]
+    if fa is None:
+        return f"🏆 **{base}**"
+    return f"🏆 **{base}** ({'FA on' if fa else 'FA off'})"

-# Fuzzy match helper
 def find_model_key(fuzzy):
    needle = norm_model(fuzzy)
    for k in all_models:
@@ -154,21 +157,19 @@ def find_model_key(fuzzy):
            return k
    return None

-# Print table
-header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["🏆 Best PP", "🏆 Best TG"]
+# Header now has Best PP & Best TG right after Model
+header = ["Model", "🏆 Best PP", "🏆 Best TG"] + [COL_NAMES[e] for e in ENV_ORDER]
 print("| " + " | ".join(header) + " |")
 print("|" + "|".join(["---"] * len(header)) + "|")

 for disp, fuzzy in DEFAULT_MODELS:
    mkey = find_model_key(fuzzy)
    if not mkey:
-        print("| " + " | ".join([f"**{disp}**"] + ["—"]*len(ENV_ORDER) + ["—","—"]) + " |")
+        print("| " + " | ".join([f"**{disp}**", "—", "—"] + ["—"]*len(ENV_ORDER)) + " |")
        continue
-    row = [f"**{disp}**"]
+    bpp_env, bpp_fa = best_env_for(mkey, "pp512")
+    btg_env, btg_fa = best_env_for(mkey, "tg128")
+    row = [f"**{disp}**", win_label(bpp_env, bpp_fa), win_label(btg_env, btg_fa)]
    for env in ENV_ORDER:
        row.append(format_cell(chosen[mkey].get(env, {})))
-    bpp = best_env_for(mkey, "pp512")
-    btg = best_env_for(mkey, "tg128")
-    row.append(f"🏆 **{WINNER_NAMES[bpp]}**" if bpp else "—")
-    row.append(f"🏆 **{WINNER_NAMES[btg]}**" if btg else "—")
    print("| " + " | ".join(row) + " |")