updated benchmarks

2025-11-17 23:02:56 +00:00
parent ad32126872
commit 1d6d48fae1
192 changed files with 13571 additions and 107 deletions
@@ -0,0 +1 @@
+__pycache__
@@ -2,7 +2,10 @@
 import re, glob, os, json, time
 from pathlib import Path

-RESULTS_DIR = "results"
+RESULT_SOURCES = [
+    ("results", False),       # regular single-node runs
+    ("results-rpc", True),    # distributed RPC runs across two servers
+]
 OUT_JSON = "../docs/results.json"

 # --- Regexes ---------------------------------------------------------------
@@ -39,23 +42,39 @@ LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE)

 # --- Helpers ---------------------------------------------------------------

+ENV_CANON = {
+    "rocm7_1": "rocm7.1",
+}
+
 def clean_model_name(raw):
    base = SHARD_RE.sub("", raw)
    return base

+def canonicalize_env(env):
+    if not env:
+        return env
+    for raw, canon in ENV_CANON.items():
+        prefix = f"{raw}-"
+        if env == raw:
+            return canon
+        if env.startswith(prefix):
+            return canon + env[len(raw):]
+    return env
+
 def parse_env_flags(basename):
    """
-    pattern: <model>__<env>[__fa1][__hblt0][__longctx32768]
-    Returns (env, fa, context_tag, context_tokens)
+    pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
+    Returns (env, fa, context_tag, context_tokens, rpc_flag)
    """
    parts = basename.split("__")
    if len(parts) < 2:
-        return None, False, "default", None
+        return None, False, "default", None, False

    env = parts[1]
    fa = False
    context_tag = "default"
    context_tokens = None
+    rpc_flag = False

    for raw_suffix in parts[2:]:
        suffix = raw_suffix.lower()
@@ -71,8 +90,10 @@ def parse_env_flags(basename):
                    context_tokens = int(m.group(1))
                except ValueError:
                    context_tokens = None
+        elif suffix == "rpc":
+            rpc_flag = True

-    return env, fa, context_tag, context_tokens
+    return env, fa, context_tag, context_tokens, rpc_flag

 def env_base_and_variant(env):
    # e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
@@ -148,111 +169,115 @@ runs = []
 builds = set()
 envs  = set()

-for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
-    base = os.path.basename(path).rsplit(".log", 1)[0]
-    if "__" not in base:
-        continue
+for results_dir, is_rpc_source in RESULT_SOURCES:
+    glob_pattern = os.path.join(results_dir, "*.log")
+    for path in sorted(glob.glob(glob_pattern)):
+        base = os.path.basename(path).rsplit(".log", 1)[0]
+        if "__" not in base:
+            continue

-    model_raw, _rest = base.split("__", 1)
-    env, fa_from_name, context_tag, context_tokens = parse_env_flags(base)
-    if env:
-        envs.add(env)
+        model_raw, _rest = base.split("__", 1)
+        env, fa_from_name, context_tag, context_tokens, rpc_flag = parse_env_flags(base)
+        env = canonicalize_env(env)
+        if env:
+            envs.add(env)

-    model_clean = clean_model_name(model_raw)
+        model_clean = clean_model_name(model_raw)

-    with open(path, errors="ignore") as f:
-        text = f.read()
+        with open(path, errors="ignore") as f:
+            text = f.read()

-    # build info (take the last match in file if many)
-    build_hash, build_num = None, None
-    for m in BUILD_RE.finditer(text):
-        build_hash, build_num = m.group(1), m.group(2)
-    if build_hash:
-        builds.add((build_hash, build_num))
+        # build info (take the last match in file if many)
+        build_hash, build_num = None, None
+        for m in BUILD_RE.finditer(text):
+            build_hash, build_num = m.group(1), m.group(2)
+        if build_hash:
+            builds.add((build_hash, build_num))

-    # detect error (if there is no valid table rows)
-    table_rows = parse_table(text)
+        # detect error (if there is no valid table rows)
+        table_rows = parse_table(text)

-    # If table rows exist, we’ll still mark errors only if no perf found
-    has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows)
-    has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows)
-    error, etype = (False, None)
-    if not (has_pp or has_tg):
-        error, etype = detect_error(text)
+        # If table rows exist, we’ll still mark errors only if no perf found
+        has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows)
+        has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows)
+        error, etype = (False, None)
+        if not (has_pp or has_tg):
+            error, etype = detect_error(text)

-    # Determine FA flag:
-    #   prefer explicit column "fa" if present, else fallback to filename "__fa1"
-    fa_in_table = None
-    for r in table_rows:
-        if "fa" in r:
-            try:
-                fa_in_table = int(r["fa"]) == 1
-            except:
-                fa_in_table = None
-            break
-    fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name
+        # Determine FA flag:
+        #   prefer explicit column "fa" if present, else fallback to filename "__fa1"
+        fa_in_table = None
+        for r in table_rows:
+            if "fa" in r:
+                try:
+                    fa_in_table = int(r["fa"]) == 1
+                except:
+                    fa_in_table = None
+                break
+        fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name

-    # Normalize env base / variant (e.g., rocwmma)
-    env_base, env_variant = env_base_and_variant(env)
+        # Normalize env base / variant (e.g., rocwmma)
+        env_base, env_variant = env_base_and_variant(env)

-    # Emit one run per row (pp512 / tg128)
-    for r in table_rows or [{}]:
-        test = r.get("test", "").lower() if table_rows else None
-        tps_mean, tps_std = None, None
-        if table_rows:
-            ts_field = r.get("t/s", "")
-            m = TS_RE.search(ts_field)
-            if m:
-                tps_mean = coerce_float(m.group(1))
-                tps_std  = coerce_float(m.group(2))
+        # Emit one run per row (pp512 / tg128)
+        for r in table_rows or [{}]:
+            test = r.get("test", "").lower() if table_rows else None
+            tps_mean, tps_std = None, None
+            if table_rows:
+                ts_field = r.get("t/s", "")
+                m = TS_RE.search(ts_field)
+                if m:
+                    tps_mean = coerce_float(m.group(1))
+                    tps_std  = coerce_float(m.group(2))

-        # parse numeric helpers from row (if present)
-        params_b = None
-        file_size_gib = None
-        if "params" in r:
-            pm = PARAMS_RE.search(r["params"])
-            if pm:
-                params_b = coerce_float(pm.group(1).replace(",", ""))
-        if "size" in r:
-            sm = GIB_RE.search(r["size"])
-            if sm:
-                file_size_gib = coerce_float(sm.group(1).replace(",", ""))
+            # parse numeric helpers from row (if present)
+            params_b = None
+            file_size_gib = None
+            if "params" in r:
+                pm = PARAMS_RE.search(r["params"])
+                if pm:
+                    params_b = coerce_float(pm.group(1).replace(",", ""))
+            if "size" in r:
+                sm = GIB_RE.search(r["size"])
+                if sm:
+                    file_size_gib = coerce_float(sm.group(1).replace(",", ""))

-        # quant from model name (unchanged)
-        quant = extract_quant(model_clean)
+            # quant from model name (unchanged)
+            quant = extract_quant(model_clean)

-        # name_params_b: prefer table value; else fall back to B in model name
-        name_params_b = params_b if params_b is not None else b_from_name(model_clean)
+            # name_params_b: prefer table value; else fall back to B in model name
+            name_params_b = params_b if params_b is not None else b_from_name(model_clean)

-        backend = r.get("backend")
-        ngl = r.get("ngl")
-        mmap = r.get("mmap")
+            backend = r.get("backend")
+            ngl = r.get("ngl")
+            mmap = r.get("mmap")

-        run = {
-            "model": model_raw,
-            "model_clean": model_clean,
-            "env": env,
-            "env_base": env_base,
-            "env_variant": env_variant,         # e.g. "rocwmma"
-            "fa": bool(fa_enabled),
-            "context": context_tag or "default",
-            "context_tokens": context_tokens,
-            "test": test,                       # "pp512" | "tg128" | None (if error)
-            "tps_mean": tps_mean,
-            "tps_std": tps_std,
-            "error": bool(error),
-            "error_type": etype,                # "load" | "hang" | "runtime" | None
-            "backend": backend,
-            "ngl": (int(ngl) if (ngl and ngl.isdigit()) else None),
-            "mmap": (int(mmap) if (mmap and mmap.isdigit()) else None),
-            "params_b": params_b,               # from table, if available
-            "file_size_gib": file_size_gib,     # from table, if available
-            "name_params_b": name_params_b,     # parsed from model name (e.g., 30B -> 30.0)
-            "quant": quant,
-            "log": path,
-            "build": {"hash": build_hash, "number": build_num} if build_hash else None,
-        }
-        runs.append(run)
+            run = {
+                "model": model_raw,
+                "model_clean": model_clean,
+                "env": env,
+                "env_base": env_base,
+                "env_variant": env_variant,         # e.g. "rocwmma"
+                "fa": bool(fa_enabled),
+                "context": context_tag or "default",
+                "context_tokens": context_tokens,
+                "test": test,                       # "pp512" | "tg128" | None (if error)
+                "tps_mean": tps_mean,
+                "tps_std": tps_std,
+                "error": bool(error),
+                "error_type": etype,                # "load" | "hang" | "runtime" | None
+                "backend": backend,
+                "ngl": (int(ngl) if (ngl and ngl.isdigit()) else None),
+                "mmap": (int(mmap) if (mmap and mmap.isdigit()) else None),
+                "params_b": params_b,               # from table, if available
+                "file_size_gib": file_size_gib,     # from table, if available
+                "name_params_b": name_params_b,     # parsed from model name (e.g., 30B -> 30.0)
+                "quant": quant,
+                "log": path,
+                "rpc": bool(is_rpc_source or rpc_flag),
+                "build": {"hash": build_hash, "number": build_num} if build_hash else None,
+            }
+            runs.append(run)

 # Meta
 meta = {
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         64.83 ± 0.23 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.69 ± 0.01 |
+
+build: caca0d55c (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         50.19 ± 0.10 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
+
+build: caca0d55c (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         66.04 ± 0.17 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
+
+build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         50.78 ± 0.06 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.00 |
+
+build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         62.70 ± 0.13 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.71 ± 0.01 |
+
+build: f1840a25d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         45.01 ± 0.11 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
+
+build: f1840a25d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         65.83 ± 0.13 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.02 |
+
+build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         42.96 ± 0.13 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
+
+build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         61.63 ± 0.11 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.92 ± 0.01 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         36.76 ± 0.06 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.93 ± 0.01 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         66.33 ± 0.03 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          9.04 ± 0.01 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         30.17 ± 0.09 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          9.05 ± 0.01 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,21 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fd18621c565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd18621c92b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fd18621caaf]
+/usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7fd1862ca195]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fd186236de3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fd189269650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fd18926b2e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fd1892701bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fd18927100e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x407edc]
+/lib64/libc.so.6(+0x35b5) [0x7fd185bb25b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7fd185bb2668]
+/usr/local/bin/llama-bench() [0x409255]
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         31.42 ± 0.09 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          9.08 ± 0.01 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         63.23 ± 0.18 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.70 ± 0.01 |
+
+build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         30.12 ± 0.09 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.01 |
+
+build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         63.17 ± 0.13 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.01 |
+
+build: fa5c85a8b (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         36.22 ± 0.08 |
+| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.71 ± 0.01 |
+
+build: fa5c85a8b (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        172.03 ± 0.73 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.02 ± 0.02 |
+
+build: caca0d55c (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        174.52 ± 1.29 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.94 ± 0.07 |
+
+build: caca0d55c (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        171.93 ± 1.16 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.06 ± 0.03 |
+
+build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        174.45 ± 1.01 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.02 ± 0.03 |
+
+build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        170.89 ± 0.37 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.89 ± 0.08 |
+
+build: f1840a25d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        173.53 ± 1.57 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.92 ± 0.05 |
+
+build: f1840a25d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        172.01 ± 0.91 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.95 ± 0.04 |
+
+build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        173.90 ± 0.67 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.95 ± 0.03 |
+
+build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        142.82 ± 0.78 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.85 ± 0.08 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        144.47 ± 1.03 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.86 ± 0.03 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        143.05 ± 1.08 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.00 ± 0.05 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        145.60 ± 1.03 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.94 ± 0.02 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        144.59 ± 0.50 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.01 ± 0.03 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        146.21 ± 2.31 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.05 ± 0.10 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        170.42 ± 0.65 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.88 ± 0.06 |
+
+build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        174.42 ± 0.52 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.87 ± 0.12 |
+
+build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        171.34 ± 0.97 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.97 ± 0.01 |
+
+build: fa5c85a8b (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        173.98 ± 1.06 |
+| minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.94 ± 0.03 |
+
+build: fa5c85a8b (7085)
@@ -0,0 +1,18 @@
+ggml_vulkan: Found 1 Vulkan devices:
+ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:690: Remote RPC server crashed or returned malformed response
+/lib64/libggml-base.so.0(+0x3565) [0x7f5d2cbe9565]
+/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5d2cbe992b]
+/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5d2cbe9aaf]
+/lib64/libggml-rpc.so.0(+0x452a) [0x7f5d2fb8252a]
+/lib64/libggml-base.so.0(+0x16232) [0x7f5d2cbfc232]
+/lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0xff) [0x7f5d2cbfdf1f]
+/lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3a26) [0x7f5d2fdaad06]
+/lib64/libllama.so.0(+0x1cf16) [0x7f5d2fd11f16]
+/lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7f5d2fd12d7c]
+/usr/sbin/llama-bench() [0x406d85]
+/lib64/libc.so.6(+0x35b5) [0x7f5d2c57f5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5d2c57f668]
+/usr/sbin/llama-bench() [0x409255]
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        167.68 ± 0.26 |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         22.67 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,20 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         22.85 ± 0.00 |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f5f7bd95565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5f7bd9592b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5f7bd95aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f5f7f04eeb2]
+/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f5f7f054034]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f5f7bdac8ce]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f5f7f70a950]
+/usr/local/bin/llama-bench() [0x408242]
+/lib64/libc.so.6(+0x35b5) [0x7f5f7b72b5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5f7b72b668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        170.65 ± 0.11 |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         22.54 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         14.57 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          1.38 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        171.42 ± 0.59 |
+| glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         22.69 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f2015391565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f201539192b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2015391aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f2017d0af12]
+/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f2017d12a66]
+/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f2017d0ffcf]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20153abde3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20183de650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20183e02e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20183e51bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20183e600e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x407edc]
+/lib64/libc.so.6(+0x35b5) [0x7f2014d275b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2014d27668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        147.75 ± 0.96 |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.69 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         22.08 ± 0.00 |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          7.05 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        140.67 ± 0.37 |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.59 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         14.60 ± 0.00 |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          1.34 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        151.03 ± 0.71 |
+| glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.69 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,28 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f1a5d310565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a5d31092b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a5d310aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f1a5fc89f12]
+/usr/local/lib64/libggml-hip.so.0(+0x28ce0d7) [0x7f1a5fc9b0d7]
+/usr/local/lib64/libggml-hip.so.0(+0x28cccd1) [0x7f1a5fc99cd1]
+/usr/local/lib64/libggml-hip.so.0(+0x28cb92c) [0x7f1a5fc9892c]
+/usr/local/lib64/libggml-hip.so.0(+0x28c645a) [0x7f1a5fc9345a]
+/usr/local/lib64/libggml-hip.so.0(+0x28c2f0a) [0x7f1a5fc8ff0a]
+/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f1a5fc8efcf]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f1a5d32ade3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1a6035d650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f1a6035f2e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f1a603641bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f1a6036500e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x407edc]
+/lib64/libc.so.6(+0x35b5) [0x7f1a5cca65b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a5cca6668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           pp512 |        101.51 ± 0.07 |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           tg128 |          2.79 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         19.96 ± 0.00 |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          2.46 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           pp512 |        103.07 ± 0.08 |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           tg128 |          2.78 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,20 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         12.71 ± 0.00 |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f295ddb7565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f295ddb792b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f295ddb7aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f2960686fb2]
+/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f296068c004]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f295ddce8ce]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f2960d2e950]
+/usr/local/bin/llama-bench() [0x408242]
+/lib64/libc.so.6(+0x35b5) [0x7f295d74d5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f295d74d668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           pp512 |        102.84 ± 0.31 |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           tg128 |          2.79 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         34.86 ± 0.00 |
+| llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          2.28 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f39038cd565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f39038cd92b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f39038cdaaf]
+/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f3906b86eb2]
+/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f3906b8eb36]
+/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f3906b8c09f]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f39038e7de3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f3907243650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f39072452e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f390724a1bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f390724b00e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x407edc]
+/lib64/libc.so.6(+0x35b5) [0x7f39032635b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3903263668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        152.66 ± 0.00 |
+| llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          9.29 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        274.07 ± 3.25 |
+| llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         15.13 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,20 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        109.44 ± 0.00 |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f3efb9fa565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3efb9fa92b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3efb9faaaf]
+/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f3efe2c9fb2]
+/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f3efe2cf004]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3efba118ce]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3efe971950]
+/usr/local/bin/llama-bench() [0x408242]
+/lib64/libc.so.6(+0x35b5) [0x7f3efb3905b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3efb390668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f20b4ffb565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f20b4ffb92b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f20b4ffbaaf]
+/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f20b7974f12]
+/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f20b797ca66]
+/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f20b7979fcf]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20b5015de3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20b8048650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20b804a2e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20b804f1bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20b805000e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x407edc]
+/lib64/libc.so.6(+0x35b5) [0x7f20b49915b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f20b4991668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fe4591ff565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe4591ff92b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fe4591ffaaf]
+/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fe45bb78f12]
+/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fe45bb80a66]
+/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fe45bb7dfcf]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fe459219de3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fe45c24c650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fe45c24e2e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fe45c2531bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fe45c25400e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x407edc]
+/lib64/libc.so.6(+0x35b5) [0x7fe458b955b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7fe458b95668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        271.67 ± 1.52 |
+| llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         12.13 ± 0.05 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,20 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        153.04 ± 0.00 |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f0845525565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f084552592b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f0845525aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f08487deeb2]
+/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f08487e4034]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f084553c8ce]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f0848e9a950]
+/usr/local/bin/llama-bench() [0x408242]
+/lib64/libc.so.6(+0x35b5) [0x7f0844ebb5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0844ebb668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        269.91 ± 0.99 |
+| llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         12.11 ± 0.05 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        107.41 ± 0.00 |
+| llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          7.67 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f6a6bb84565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6a6bb8492b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6a6bb84aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f6a6e4fdf12]
+/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f6a6e505a66]
+/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f6a6e502fcf]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f6a6bb9ede3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6a6ebd1650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f6a6ebd32e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f6a6ebd81bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f6a6ebd900e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x40816d]
+/lib64/libc.so.6(+0x35b5) [0x7f6a6b51a5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f6a6b51a668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 failed (exit 0)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fa8c83e4565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa8c83e492b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa8c83e4aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fa8cad5df12]
+/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fa8cad65a66]
+/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fa8cad62fcf]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa8c83fede3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa8cb431650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fa8cb4332e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fa8cb4381bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fa8cb43900e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x408087]
+/lib64/libc.so.6(+0x35b5) [0x7fa8c7d7a5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa8c7d7a668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        312.46 ± 3.80 |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         19.50 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f68ae79e565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f68ae79e92b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f68ae79eaaf]
+/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f68b1a57eb2]
+/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f68b1a5fb36]
+/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f68b1a5d09f]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f68ae7b8de3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f68b2114650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f68b21162e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f68b211b1bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f68b211c00e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x407edc]
+/lib64/libc.so.6(+0x35b5) [0x7f68ae1345b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f68ae134668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        313.81 ± 0.68 |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         19.48 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        109.58 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         10.31 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        315.62 ± 2.64 |
+| llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         19.51 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,24 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7effceeac565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7effceeac92b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7effceeacaaf]
+/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7effd1825f12]
+/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7effd182da66]
+/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7effd182afcf]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7effceec6de3]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7effd1ef9650]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7effd1efb2e2]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7effd1f001bf]
+/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7effd1f0100e]
+/usr/local/bin/llama-bench() [0x40a3db]
+/usr/local/bin/llama-bench() [0x408087]
+/lib64/libc.so.6(+0x35b5) [0x7effce8425b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7effce842668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           pp512 |        140.40 ± 0.48 |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           tg128 |         15.93 ± 0.23 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         49.58 ± 0.00 |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          9.43 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           pp512 |        142.52 ± 0.12 |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.13 ± 0.05 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,29 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         29.46 ± 0.00 |
+:0:rocdevice.cpp            :3588: 50932421658 us:  Callback: Queue 0x7f8e6a000000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29
+Kernel Name: _ZL18flash_attn_ext_vecILi128ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil
+VGPU=0x94e06a0 SWq=0x7f8e6cbea000, HWq=0x7f8e6a000000, id=2
+	Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0
+	grid=[32, 68, 64], workgroup=[32, 4, 1]
+	private_seg_size=0, group_seg_size=4352
+	kernel_obj=0x7f8e6a78f180, kernarg_address=0x0x7f738bd49400
+	completion_signal=0x0, correlation_id=0
+	rptr=1368490, wptr=1369554
+ /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f8e79498565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8e7949892b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f8e79498aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f8e7bd67fb2]
+/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f8e7bd6d004]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f8e794af8ce]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f8e7c40f950]
+/usr/local/bin/llama-bench() [0x408242]
+/lib64/libc.so.6(+0x35b5) [0x7f8e78e2e5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8e78e2e668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           pp512 |        140.69 ± 0.99 |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.07 ± 0.05 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         38.47 ± 0.00 |
+| qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          9.20 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        352.23 ± 9.28 |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         27.04 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        192.75 ± 0.00 |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         19.17 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |       345.22 ± 23.61 |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         26.84 ± 0.40 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,20 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        135.26 ± 0.00 |
+/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
+/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f83b9245565]
+/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f83b924592b]
+/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f83b9245aaf]
+/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f83bbb14fb2]
+/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f83bbb1a004]
+/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f83b925c8ce]
+/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f83bc1bc950]
+/usr/local/bin/llama-bench() [0x408242]
+/lib64/libc.so.6(+0x35b5) [0x7f83b8bdb5b5]
+/lib64/libc.so.6(__libc_start_main+0x88) [0x7f83b8bdb668]
+/usr/local/bin/llama-bench() [0x409255]
+✖ ! [rocm-7alpha-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        360.93 ± 3.44 |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         27.17 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        197.49 ± 0.00 |
+| qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         19.17 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |       579.57 ± 12.23 |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         58.33 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        202.50 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         30.86 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        575.31 ± 5.34 |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         58.66 ± 0.01 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        145.86 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          8.72 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        576.33 ± 7.18 |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         58.48 ± 0.01 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        160.69 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         30.79 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        669.29 ± 4.01 |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         71.10 ± 0.01 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        204.78 ± 0.00 |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         33.71 ± 0.00 |
+
+build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        666.63 ± 5.54 |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         71.62 ± 0.02 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        148.47 ± 0.00 |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          8.94 ± 0.00 |
+
+build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        676.38 ± 1.86 |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         71.44 ± 0.02 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        160.70 ± 0.00 |
+| qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         33.64 ± 0.00 |
+
+build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q8_0          |  33.51 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        602.73 ± 3.88 |
+| qwen3moe 30B.A3B Q8_0          |  33.51 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         45.21 ± 0.01 |
+
+build: 12bb5c37 (7074)
--- a/Show More
+++ b/Show More