updated benchmarks

2025-11-17 23:02:56 +00:00
parent ad32126872
commit 1d6d48fae1
192 changed files with 13571 additions and 107 deletions
@@ -0,0 +1 @@
 __pycache__
@@ -2,7 +2,10 @@
 import re, glob, os, json, time
 from pathlib import Path
-RESULTS_DIR = "results"
+RESULT_SOURCES = [
    ("results", False),       # regular single-node runs
    ("results-rpc", True),    # distributed RPC runs across two servers
 ]
 OUT_JSON = "../docs/results.json"
 # --- Regexes ---------------------------------------------------------------
@@ -39,23 +42,39 @@ LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE)
 # --- Helpers ---------------------------------------------------------------
 ENV_CANON = {
    "rocm7_1": "rocm7.1",
 }
 def clean_model_name(raw):
    base = SHARD_RE.sub("", raw)
    return base
 def canonicalize_env(env):
    if not env:
        return env
    for raw, canon in ENV_CANON.items():
        prefix = f"{raw}-"
        if env == raw:
            return canon
        if env.startswith(prefix):
            return canon + env[len(raw):]
    return env
 def parse_env_flags(basename):
    """
-    pattern: <model>__<env>[__fa1][__hblt0][__longctx32768]
+    pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
-    Returns (env, fa, context_tag, context_tokens)
+    Returns (env, fa, context_tag, context_tokens, rpc_flag)
    """
    parts = basename.split("__")
    if len(parts) < 2:
-        return None, False, "default", None
+        return None, False, "default", None, False
    env = parts[1]
    fa = False
    context_tag = "default"
    context_tokens = None
    rpc_flag = False
    for raw_suffix in parts[2:]:
        suffix = raw_suffix.lower()
@@ -71,8 +90,10 @@ def parse_env_flags(basename):
                    context_tokens = int(m.group(1))
                except ValueError:
                    context_tokens = None
        elif suffix == "rpc":
            rpc_flag = True
-    return env, fa, context_tag, context_tokens
+    return env, fa, context_tag, context_tokens, rpc_flag
 def env_base_and_variant(env):
    # e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
@@ -148,13 +169,16 @@ runs = []
 builds = set()
 envs  = set()
-for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
+for results_dir, is_rpc_source in RESULT_SOURCES:
    glob_pattern = os.path.join(results_dir, "*.log")
    for path in sorted(glob.glob(glob_pattern)):
        base = os.path.basename(path).rsplit(".log", 1)[0]
        if "__" not in base:
            continue
        model_raw, _rest = base.split("__", 1)
-    env, fa_from_name, context_tag, context_tokens = parse_env_flags(base)
+        env, fa_from_name, context_tag, context_tokens, rpc_flag = parse_env_flags(base)
        env = canonicalize_env(env)
        if env:
            envs.add(env)
@@ -250,6 +274,7 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
                "name_params_b": name_params_b,     # parsed from model name (e.g., 30B -> 30.0)
                "quant": quant,
                "log": path,
                "rpc": bool(is_rpc_source or rpc_flag),
                "build": {"hash": build_hash, "number": build_num} if build_hash else None,
            }
            runs.append(run)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         64.83 ± 0.23 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.69 ± 0.01 |
 build: caca0d55c (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         50.19 ± 0.10 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
 build: caca0d55c (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         66.04 ± 0.17 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
 build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         50.78 ± 0.06 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.00 |
 build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         62.70 ± 0.13 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.71 ± 0.01 |
 build: f1840a25d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         45.01 ± 0.11 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
 build: f1840a25d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         65.83 ± 0.13 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.02 |
 build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         42.96 ± 0.13 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.73 ± 0.01 |
 build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         61.63 ± 0.11 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.92 ± 0.01 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         36.76 ± 0.06 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.93 ± 0.01 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         66.33 ± 0.03 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          9.04 ± 0.01 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         30.17 ± 0.09 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          9.05 ± 0.01 |
 build: 4db63cdde (7085)
@@ -0,0 +1,21 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fd18621c565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd18621c92b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fd18621caaf]
 /usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7fd1862ca195]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fd186236de3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fd189269650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fd18926b2e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fd1892701bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fd18927100e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x407edc]
 /lib64/libc.so.6(+0x35b5) [0x7fd185bb25b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7fd185bb2668]
 /usr/local/bin/llama-bench() [0x409255]
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         31.42 ± 0.09 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          9.08 ± 0.01 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         63.23 ± 0.18 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.70 ± 0.01 |
 build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         30.12 ± 0.09 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.01 |
 build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         63.17 ± 0.13 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.72 ± 0.01 |
 build: fa5c85a8b (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |         36.22 ± 0.08 |
 | glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB |   356.79 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |          8.71 ± 0.01 |
 build: fa5c85a8b (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        172.03 ± 0.73 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.02 ± 0.02 |
 build: caca0d55c (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        174.52 ± 1.29 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.94 ± 0.07 |
 build: caca0d55c (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        171.93 ± 1.16 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.06 ± 0.03 |
 build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        174.45 ± 1.01 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.02 ± 0.03 |
 build: 86f1f4411 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        170.89 ± 0.37 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.89 ± 0.08 |
 build: f1840a25d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        173.53 ± 1.57 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.92 ± 0.05 |
 build: f1840a25d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        172.01 ± 0.91 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.95 ± 0.04 |
 build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        173.90 ± 0.67 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.95 ± 0.03 |
 build: 677be4d78 (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        142.82 ± 0.78 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.85 ± 0.08 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        144.47 ± 1.03 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.86 ± 0.03 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        143.05 ± 1.08 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.00 ± 0.05 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        145.60 ± 1.03 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.94 ± 0.02 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        144.59 ± 0.50 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.01 ± 0.03 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        146.21 ± 2.31 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         18.05 ± 0.10 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        170.42 ± 0.65 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.88 ± 0.06 |
 build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        174.42 ± 0.52 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.87 ± 0.12 |
 build: b447a9a4b (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        171.34 ± 0.97 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.97 ± 0.01 |
 build: fa5c85a8b (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           pp512 |        173.98 ± 1.06 |
 | minimax-m2 230B.A10B Q6_K      | 180.94 GiB |   228.69 B | ROCm,RPC   |  99 |  1 |    0 |           tg128 |         17.94 ± 0.03 |
 build: fa5c85a8b (7085)
@@ -0,0 +1,18 @@
 ggml_vulkan: Found 1 Vulkan devices:
 ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:690: Remote RPC server crashed or returned malformed response
 /lib64/libggml-base.so.0(+0x3565) [0x7f5d2cbe9565]
 /lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5d2cbe992b]
 /lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5d2cbe9aaf]
 /lib64/libggml-rpc.so.0(+0x452a) [0x7f5d2fb8252a]
 /lib64/libggml-base.so.0(+0x16232) [0x7f5d2cbfc232]
 /lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0xff) [0x7f5d2cbfdf1f]
 /lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3a26) [0x7f5d2fdaad06]
 /lib64/libllama.so.0(+0x1cf16) [0x7f5d2fd11f16]
 /lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7f5d2fd12d7c]
 /usr/sbin/llama-bench() [0x406d85]
 /lib64/libc.so.6(+0x35b5) [0x7f5d2c57f5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f5d2c57f668]
 /usr/sbin/llama-bench() [0x409255]
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        167.68 ± 0.26 |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         22.67 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,20 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         22.85 ± 0.00 |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f5f7bd95565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5f7bd9592b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5f7bd95aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f5f7f04eeb2]
 /usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f5f7f054034]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f5f7bdac8ce]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f5f7f70a950]
 /usr/local/bin/llama-bench() [0x408242]
 /lib64/libc.so.6(+0x35b5) [0x7f5f7b72b5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f5f7b72b668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        170.65 ± 0.11 |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         22.54 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         14.57 ± 0.00 |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          1.38 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        171.42 ± 0.59 |
 | glm4moe 106B.A12B Q4_K - Medium |  68.01 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         22.69 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f2015391565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f201539192b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2015391aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f2017d0af12]
 /usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f2017d12a66]
 /usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f2017d0ffcf]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20153abde3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20183de650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20183e02e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20183e51bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20183e600e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x407edc]
 /lib64/libc.so.6(+0x35b5) [0x7f2014d275b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f2014d27668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        147.75 ± 0.96 |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.69 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         22.08 ± 0.00 |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          7.05 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        140.67 ± 0.37 |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.59 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         14.60 ± 0.00 |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          1.34 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           pp512 |        151.03 ± 0.71 |
 | glm4moe 106B.A12B Q6_K         |  94.57 GiB |   110.47 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.69 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,28 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f1a5d310565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a5d31092b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a5d310aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f1a5fc89f12]
 /usr/local/lib64/libggml-hip.so.0(+0x28ce0d7) [0x7f1a5fc9b0d7]
 /usr/local/lib64/libggml-hip.so.0(+0x28cccd1) [0x7f1a5fc99cd1]
 /usr/local/lib64/libggml-hip.so.0(+0x28cb92c) [0x7f1a5fc9892c]
 /usr/local/lib64/libggml-hip.so.0(+0x28c645a) [0x7f1a5fc9345a]
 /usr/local/lib64/libggml-hip.so.0(+0x28c2f0a) [0x7f1a5fc8ff0a]
 /usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f1a5fc8efcf]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f1a5d32ade3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1a6035d650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f1a6035f2e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f1a603641bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f1a6036500e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x407edc]
 /lib64/libc.so.6(+0x35b5) [0x7f1a5cca65b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a5cca6668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           pp512 |        101.51 ± 0.07 |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           tg128 |          2.79 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         19.96 ± 0.00 |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          2.46 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           pp512 |        103.07 ± 0.08 |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           tg128 |          2.78 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,20 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         12.71 ± 0.00 |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f295ddb7565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f295ddb792b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f295ddb7aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f2960686fb2]
 /usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f296068c004]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f295ddce8ce]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f2960d2e950]
 /usr/local/bin/llama-bench() [0x408242]
 /lib64/libc.so.6(+0x35b5) [0x7f295d74d5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f295d74d668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           pp512 |        102.84 ± 0.31 |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |  1 |    0 |           tg128 |          2.79 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         34.86 ± 0.00 |
 | llama 70B Q8_0                 |  75.65 GiB |    70.55 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          2.28 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f39038cd565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f39038cd92b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f39038cdaaf]
 /usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f3906b86eb2]
 /usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f3906b8eb36]
 /usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f3906b8c09f]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f39038e7de3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f3907243650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f39072452e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f390724a1bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f390724b00e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x407edc]
 /lib64/libc.so.6(+0x35b5) [0x7f39032635b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f3903263668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        152.66 ± 0.00 |
 | llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          9.29 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        274.07 ± 3.25 |
 | llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         15.13 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,20 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q6_K    |  82.35 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        109.44 ± 0.00 |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f3efb9fa565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3efb9fa92b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3efb9faaaf]
 /usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f3efe2c9fb2]
 /usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f3efe2cf004]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3efba118ce]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3efe971950]
 /usr/local/bin/llama-bench() [0x408242]
 /lib64/libc.so.6(+0x35b5) [0x7f3efb3905b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f3efb390668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f20b4ffb565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f20b4ffb92b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f20b4ffbaaf]
 /usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f20b7974f12]
 /usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f20b797ca66]
 /usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f20b7979fcf]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20b5015de3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20b8048650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20b804a2e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20b804f1bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20b805000e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x407edc]
 /lib64/libc.so.6(+0x35b5) [0x7f20b49915b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f20b4991668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fe4591ff565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe4591ff92b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fe4591ffaaf]
 /usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fe45bb78f12]
 /usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fe45bb80a66]
 /usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fe45bb7dfcf]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fe459219de3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fe45c24c650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fe45c24e2e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fe45c2531bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fe45c25400e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x407edc]
 /lib64/libc.so.6(+0x35b5) [0x7fe458b955b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7fe458b95668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        271.67 ± 1.52 |
 | llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         12.13 ± 0.05 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,20 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        153.04 ± 0.00 |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f0845525565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f084552592b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f0845525aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f08487deeb2]
 /usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f08487e4034]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f084553c8ce]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f0848e9a950]
 /usr/local/bin/llama-bench() [0x408242]
 /lib64/libc.so.6(+0x35b5) [0x7f0844ebb5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f0844ebb668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        269.91 ± 0.99 |
 | llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         12.11 ± 0.05 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        107.41 ± 0.00 |
 | llama4 17Bx16E (Scout) Q8_0    | 106.65 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          7.67 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f6a6bb84565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6a6bb8492b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6a6bb84aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f6a6e4fdf12]
 /usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f6a6e505a66]
 /usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f6a6e502fcf]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f6a6bb9ede3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6a6ebd1650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f6a6ebd32e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f6a6ebd81bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f6a6ebd900e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x40816d]
 /lib64/libc.so.6(+0x35b5) [0x7f6a6b51a5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f6a6b51a668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 failed (exit 0)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fa8c83e4565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa8c83e492b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa8c83e4aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fa8cad5df12]
 /usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fa8cad65a66]
 /usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fa8cad62fcf]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa8c83fede3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa8cb431650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fa8cb4332e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fa8cb4381bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fa8cb43900e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x408087]
 /lib64/libc.so.6(+0x35b5) [0x7fa8c7d7a5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7fa8c7d7a668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        312.46 ± 3.80 |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         19.50 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f68ae79e565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f68ae79e92b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f68ae79eaaf]
 /usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f68b1a57eb2]
 /usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f68b1a5fb36]
 /usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f68b1a5d09f]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f68ae7b8de3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f68b2114650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f68b21162e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f68b211b1bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f68b211c00e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x407edc]
 /lib64/libc.so.6(+0x35b5) [0x7f68ae1345b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f68ae134668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        313.81 ± 0.68 |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         19.48 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        109.58 ± 0.00 |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         10.31 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           pp512 |        315.62 ± 2.64 |
 | llama4 17Bx16E (Scout) Q4_K - Medium |  57.73 GiB |   107.77 B | ROCm       |  99 |  1 |    0 |           tg128 |         19.51 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,24 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7effceeac565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7effceeac92b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7effceeacaaf]
 /usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7effd1825f12]
 /usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7effd182da66]
 /usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7effd182afcf]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7effceec6de3]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7effd1ef9650]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7effd1efb2e2]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7effd1f001bf]
 /usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7effd1f0100e]
 /usr/local/bin/llama-bench() [0x40a3db]
 /usr/local/bin/llama-bench() [0x408087]
 /lib64/libc.so.6(+0x35b5) [0x7effce8425b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7effce842668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           pp512 |        140.40 ± 0.48 |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           tg128 |         15.93 ± 0.23 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         49.58 ± 0.00 |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          9.43 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           pp512 |        142.52 ± 0.12 |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.13 ± 0.05 |
 build: 4db63cdde (7085)
@@ -0,0 +1,29 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         29.46 ± 0.00 |
 :0:rocdevice.cpp            :3588: 50932421658 us:  Callback: Queue 0x7f8e6a000000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29
 Kernel Name: _ZL18flash_attn_ext_vecILi128ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil
 VGPU=0x94e06a0 SWq=0x7f8e6cbea000, HWq=0x7f8e6a000000, id=2
 	Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0
 	grid=[32, 68, 64], workgroup=[32, 4, 1]
 	private_seg_size=0, group_seg_size=4352
 	kernel_obj=0x7f8e6a78f180, kernarg_address=0x0x7f738bd49400
 	completion_signal=0x0, correlation_id=0
 	rptr=1368490, wptr=1369554
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f8e79498565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8e7949892b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f8e79498aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f8e7bd67fb2]
 /usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f8e7bd6d004]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f8e794af8ce]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f8e7c40f950]
 /usr/local/bin/llama-bench() [0x408242]
 /lib64/libc.so.6(+0x35b5) [0x7f8e78e2e5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f8e78e2e668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           pp512 |        140.69 ± 0.99 |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |  1 |    0 |           tg128 |         16.07 ± 0.05 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |         38.47 ± 0.00 |
 | qwen3moe 235B.A22B Q3_K - Medium |  96.99 GiB |   235.09 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          9.20 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        352.23 ± 9.28 |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         27.04 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        192.75 ± 0.00 |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         19.17 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |       345.22 ± 23.61 |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         26.84 ± 0.40 |
 build: 4db63cdde (7085)
@@ -0,0 +1,20 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        135.26 ± 0.00 |
 /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
 /usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f83b9245565]
 /usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f83b924592b]
 /usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f83b9245aaf]
 /usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f83bbb14fb2]
 /usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f83bbb1a004]
 /usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f83b925c8ce]
 /usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f83bc1bc950]
 /usr/local/bin/llama-bench() [0x408242]
 /lib64/libc.so.6(+0x35b5) [0x7f83b8bdb5b5]
 /lib64/libc.so.6(__libc_start_main+0x88) [0x7f83b8bdb668]
 /usr/local/bin/llama-bench() [0x409255]
 ✖ ! [rocm-7alpha-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        360.93 ± 3.44 |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         27.17 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        197.49 ± 0.00 |
 | qwen3moe 30B.A3B BF16          |  56.89 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         19.17 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |       579.57 ± 12.23 |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         58.33 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        202.50 ± 0.00 |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         30.86 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        575.31 ± 5.34 |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         58.66 ± 0.01 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        145.86 ± 0.00 |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          8.72 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        576.33 ± 7.18 |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         58.48 ± 0.01 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        160.69 ± 0.00 |
 | qwen3moe 30B.A3B Q6_K          |  24.53 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         30.79 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        669.29 ± 4.01 |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         71.10 ± 0.01 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        204.78 ± 0.00 |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         33.71 ± 0.00 |
 build: 12bb5c37 (7074)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        666.63 ± 5.54 |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         71.62 ± 0.02 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        148.47 ± 0.00 |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |          8.94 ± 0.00 |
 build: 4db63cdde (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        676.38 ± 1.86 |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         71.44 ± 0.02 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | n_ubatch | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 | pp2048 @ d32768 |        160.70 ± 0.00 |
 | qwen3moe 30B.A3B Q4_K - Medium |  17.35 GiB |    30.53 B | ROCm       |  99 |     2048 |  1 |    0 |   tg32 @ d32768 |         33.64 ± 0.00 |
 build: 4fc43d43d (7085)
@@ -0,0 +1,10 @@
 ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
 ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
 ggml_cuda_init: found 1 ROCm devices:
  Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
 | model                          |       size |     params | backend    | ngl | fa | mmap |            test |                  t/s |
 | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
 | qwen3moe 30B.A3B Q8_0          |  33.51 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           pp512 |        602.73 ± 3.88 |
 | qwen3moe 30B.A3B Q8_0          |  33.51 GiB |    30.53 B | ROCm       |  99 |  1 |    0 |           tg128 |         45.21 ± 0.01 |
 build: 12bb5c37 (7074)
--- a/Show More
+++ b/Show More