updated benchmarks
This commit is contained in:
@@ -0,0 +1 @@
|
|||||||
|
__pycache__
|
||||||
@@ -2,7 +2,10 @@
|
|||||||
import re, glob, os, json, time
|
import re, glob, os, json, time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
RESULTS_DIR = "results"
|
RESULT_SOURCES = [
|
||||||
|
("results", False), # regular single-node runs
|
||||||
|
("results-rpc", True), # distributed RPC runs across two servers
|
||||||
|
]
|
||||||
OUT_JSON = "../docs/results.json"
|
OUT_JSON = "../docs/results.json"
|
||||||
|
|
||||||
# --- Regexes ---------------------------------------------------------------
|
# --- Regexes ---------------------------------------------------------------
|
||||||
@@ -39,23 +42,39 @@ LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE)
|
|||||||
|
|
||||||
# --- Helpers ---------------------------------------------------------------
|
# --- Helpers ---------------------------------------------------------------
|
||||||
|
|
||||||
|
ENV_CANON = {
|
||||||
|
"rocm7_1": "rocm7.1",
|
||||||
|
}
|
||||||
|
|
||||||
def clean_model_name(raw):
|
def clean_model_name(raw):
|
||||||
base = SHARD_RE.sub("", raw)
|
base = SHARD_RE.sub("", raw)
|
||||||
return base
|
return base
|
||||||
|
|
||||||
|
def canonicalize_env(env):
|
||||||
|
if not env:
|
||||||
|
return env
|
||||||
|
for raw, canon in ENV_CANON.items():
|
||||||
|
prefix = f"{raw}-"
|
||||||
|
if env == raw:
|
||||||
|
return canon
|
||||||
|
if env.startswith(prefix):
|
||||||
|
return canon + env[len(raw):]
|
||||||
|
return env
|
||||||
|
|
||||||
def parse_env_flags(basename):
|
def parse_env_flags(basename):
|
||||||
"""
|
"""
|
||||||
pattern: <model>__<env>[__fa1][__hblt0][__longctx32768]
|
pattern: <model>__<env>[__fa1][__hblt0][__longctx32768][__rpc]
|
||||||
Returns (env, fa, context_tag, context_tokens)
|
Returns (env, fa, context_tag, context_tokens, rpc_flag)
|
||||||
"""
|
"""
|
||||||
parts = basename.split("__")
|
parts = basename.split("__")
|
||||||
if len(parts) < 2:
|
if len(parts) < 2:
|
||||||
return None, False, "default", None
|
return None, False, "default", None, False
|
||||||
|
|
||||||
env = parts[1]
|
env = parts[1]
|
||||||
fa = False
|
fa = False
|
||||||
context_tag = "default"
|
context_tag = "default"
|
||||||
context_tokens = None
|
context_tokens = None
|
||||||
|
rpc_flag = False
|
||||||
|
|
||||||
for raw_suffix in parts[2:]:
|
for raw_suffix in parts[2:]:
|
||||||
suffix = raw_suffix.lower()
|
suffix = raw_suffix.lower()
|
||||||
@@ -71,8 +90,10 @@ def parse_env_flags(basename):
|
|||||||
context_tokens = int(m.group(1))
|
context_tokens = int(m.group(1))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
context_tokens = None
|
context_tokens = None
|
||||||
|
elif suffix == "rpc":
|
||||||
|
rpc_flag = True
|
||||||
|
|
||||||
return env, fa, context_tag, context_tokens
|
return env, fa, context_tag, context_tokens, rpc_flag
|
||||||
|
|
||||||
def env_base_and_variant(env):
|
def env_base_and_variant(env):
|
||||||
# e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
|
# e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
|
||||||
@@ -148,13 +169,16 @@ runs = []
|
|||||||
builds = set()
|
builds = set()
|
||||||
envs = set()
|
envs = set()
|
||||||
|
|
||||||
for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
|
for results_dir, is_rpc_source in RESULT_SOURCES:
|
||||||
|
glob_pattern = os.path.join(results_dir, "*.log")
|
||||||
|
for path in sorted(glob.glob(glob_pattern)):
|
||||||
base = os.path.basename(path).rsplit(".log", 1)[0]
|
base = os.path.basename(path).rsplit(".log", 1)[0]
|
||||||
if "__" not in base:
|
if "__" not in base:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
model_raw, _rest = base.split("__", 1)
|
model_raw, _rest = base.split("__", 1)
|
||||||
env, fa_from_name, context_tag, context_tokens = parse_env_flags(base)
|
env, fa_from_name, context_tag, context_tokens, rpc_flag = parse_env_flags(base)
|
||||||
|
env = canonicalize_env(env)
|
||||||
if env:
|
if env:
|
||||||
envs.add(env)
|
envs.add(env)
|
||||||
|
|
||||||
@@ -250,6 +274,7 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
|
|||||||
"name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0)
|
"name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0)
|
||||||
"quant": quant,
|
"quant": quant,
|
||||||
"log": path,
|
"log": path,
|
||||||
|
"rpc": bool(is_rpc_source or rpc_flag),
|
||||||
"build": {"hash": build_hash, "number": build_num} if build_hash else None,
|
"build": {"hash": build_hash, "number": build_num} if build_hash else None,
|
||||||
}
|
}
|
||||||
runs.append(run)
|
runs.append(run)
|
||||||
|
|||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 64.83 ± 0.23 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.69 ± 0.01 |
|
||||||
|
|
||||||
|
build: caca0d55c (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 50.19 ± 0.10 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 |
|
||||||
|
|
||||||
|
build: caca0d55c (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 66.04 ± 0.17 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 |
|
||||||
|
|
||||||
|
build: 86f1f4411 (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 50.78 ± 0.06 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.00 |
|
||||||
|
|
||||||
|
build: 86f1f4411 (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 62.70 ± 0.13 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.71 ± 0.01 |
|
||||||
|
|
||||||
|
build: f1840a25d (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 45.01 ± 0.11 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 |
|
||||||
|
|
||||||
|
build: f1840a25d (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 65.83 ± 0.13 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.02 |
|
||||||
|
|
||||||
|
build: 677be4d78 (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 42.96 ± 0.13 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 |
|
||||||
|
|
||||||
|
build: 677be4d78 (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 61.63 ± 0.11 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.92 ± 0.01 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 36.76 ± 0.06 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.93 ± 0.01 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 66.33 ± 0.03 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.04 ± 0.01 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 30.17 ± 0.09 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.05 ± 0.01 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fd18621c565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd18621c92b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fd18621caaf]
|
||||||
|
/usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7fd1862ca195]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fd186236de3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fd189269650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fd18926b2e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fd1892701bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fd18927100e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x407edc]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7fd185bb25b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7fd185bb2668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 31.42 ± 0.09 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.08 ± 0.01 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 63.23 ± 0.18 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.70 ± 0.01 |
|
||||||
|
|
||||||
|
build: b447a9a4b (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 30.12 ± 0.09 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.01 |
|
||||||
|
|
||||||
|
build: b447a9a4b (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 63.17 ± 0.13 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.01 |
|
||||||
|
|
||||||
|
build: fa5c85a8b (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 36.22 ± 0.08 |
|
||||||
|
| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.71 ± 0.01 |
|
||||||
|
|
||||||
|
build: fa5c85a8b (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 172.03 ± 0.73 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.02 ± 0.02 |
|
||||||
|
|
||||||
|
build: caca0d55c (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 174.52 ± 1.29 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.94 ± 0.07 |
|
||||||
|
|
||||||
|
build: caca0d55c (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 171.93 ± 1.16 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.06 ± 0.03 |
|
||||||
|
|
||||||
|
build: 86f1f4411 (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 174.45 ± 1.01 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.02 ± 0.03 |
|
||||||
|
|
||||||
|
build: 86f1f4411 (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 170.89 ± 0.37 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.89 ± 0.08 |
|
||||||
|
|
||||||
|
build: f1840a25d (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 173.53 ± 1.57 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.92 ± 0.05 |
|
||||||
|
|
||||||
|
build: f1840a25d (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 172.01 ± 0.91 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.95 ± 0.04 |
|
||||||
|
|
||||||
|
build: 677be4d78 (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 173.90 ± 0.67 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.95 ± 0.03 |
|
||||||
|
|
||||||
|
build: 677be4d78 (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 142.82 ± 0.78 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.85 ± 0.08 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 144.47 ± 1.03 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.86 ± 0.03 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 143.05 ± 1.08 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.00 ± 0.05 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 145.60 ± 1.03 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.94 ± 0.02 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 144.59 ± 0.50 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.01 ± 0.03 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 146.21 ± 2.31 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.05 ± 0.10 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 170.42 ± 0.65 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.88 ± 0.06 |
|
||||||
|
|
||||||
|
build: b447a9a4b (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 174.42 ± 0.52 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.87 ± 0.12 |
|
||||||
|
|
||||||
|
build: b447a9a4b (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 171.34 ± 0.97 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.97 ± 0.01 |
|
||||||
|
|
||||||
|
build: fa5c85a8b (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 173.98 ± 1.06 |
|
||||||
|
| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.94 ± 0.03 |
|
||||||
|
|
||||||
|
build: fa5c85a8b (7085)
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:690: Remote RPC server crashed or returned malformed response
|
||||||
|
/lib64/libggml-base.so.0(+0x3565) [0x7f5d2cbe9565]
|
||||||
|
/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5d2cbe992b]
|
||||||
|
/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5d2cbe9aaf]
|
||||||
|
/lib64/libggml-rpc.so.0(+0x452a) [0x7f5d2fb8252a]
|
||||||
|
/lib64/libggml-base.so.0(+0x16232) [0x7f5d2cbfc232]
|
||||||
|
/lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0xff) [0x7f5d2cbfdf1f]
|
||||||
|
/lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3a26) [0x7f5d2fdaad06]
|
||||||
|
/lib64/libllama.so.0(+0x1cf16) [0x7f5d2fd11f16]
|
||||||
|
/lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7f5d2fd12d7c]
|
||||||
|
/usr/sbin/llama-bench() [0x406d85]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f5d2c57f5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5d2c57f668]
|
||||||
|
/usr/sbin/llama-bench() [0x409255]
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 167.68 ± 0.26 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.67 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+20
@@ -0,0 +1,20 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.85 ± 0.00 |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f5f7bd95565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5f7bd9592b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5f7bd95aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f5f7f04eeb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f5f7f054034]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f5f7bdac8ce]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f5f7f70a950]
|
||||||
|
/usr/local/bin/llama-bench() [0x408242]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f5f7b72b5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5f7b72b668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 170.65 ± 0.11 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.54 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.57 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.38 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.42 ± 0.59 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.69 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f2015391565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f201539192b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2015391aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f2017d0af12]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f2017d12a66]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f2017d0ffcf]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20153abde3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20183de650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20183e02e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20183e51bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20183e600e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x407edc]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f2014d275b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2014d27668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 147.75 ± 0.96 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.08 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.67 ± 0.37 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.60 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.34 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 151.03 ± 0.71 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+28
@@ -0,0 +1,28 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f1a5d310565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a5d31092b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a5d310aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f1a5fc89f12]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28ce0d7) [0x7f1a5fc9b0d7]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28cccd1) [0x7f1a5fc99cd1]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28cb92c) [0x7f1a5fc9892c]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c645a) [0x7f1a5fc9345a]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c2f0a) [0x7f1a5fc8ff0a]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f1a5fc8efcf]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f1a5d32ade3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1a6035d650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f1a6035f2e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f1a603641bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f1a6036500e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x407edc]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f1a5cca65b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a5cca6668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.51 ± 0.07 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.96 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.07 ± 0.08 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+20
@@ -0,0 +1,20 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.71 ± 0.00 |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f295ddb7565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f295ddb792b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f295ddb7aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f2960686fb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f296068c004]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f295ddce8ce]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f2960d2e950]
|
||||||
|
/usr/local/bin/llama-bench() [0x408242]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f295d74d5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f295d74d668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.84 ± 0.31 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.86 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.28 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f39038cd565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f39038cd92b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f39038cdaaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f3906b86eb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f3906b8eb36]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f3906b8c09f]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f39038e7de3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f3907243650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f39072452e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f390724a1bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f390724b00e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x407edc]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f39032635b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3903263668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 152.66 ± 0.00 |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.29 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 274.07 ± 3.25 |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 15.13 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+20
@@ -0,0 +1,20 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.44 ± 0.00 |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f3efb9fa565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3efb9fa92b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3efb9faaaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f3efe2c9fb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f3efe2cf004]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3efba118ce]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3efe971950]
|
||||||
|
/usr/local/bin/llama-bench() [0x408242]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f3efb3905b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3efb390668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f20b4ffb565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f20b4ffb92b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f20b4ffbaaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f20b7974f12]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f20b797ca66]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f20b7979fcf]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20b5015de3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20b8048650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20b804a2e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20b804f1bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20b805000e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x407edc]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f20b49915b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f20b4991668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fe4591ff565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe4591ff92b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fe4591ffaaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fe45bb78f12]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fe45bb80a66]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fe45bb7dfcf]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fe459219de3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fe45c24c650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fe45c24e2e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fe45c2531bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fe45c25400e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x407edc]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7fe458b955b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7fe458b95668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 271.67 ± 1.52 |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.13 ± 0.05 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+20
@@ -0,0 +1,20 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.04 ± 0.00 |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f0845525565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f084552592b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f0845525aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f08487deeb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f08487e4034]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f084553c8ce]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f0848e9a950]
|
||||||
|
/usr/local/bin/llama-bench() [0x408242]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f0844ebb5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0844ebb668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 269.91 ± 0.99 |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.11 ± 0.05 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.41 ± 0.00 |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.67 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f6a6bb84565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6a6bb8492b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6a6bb84aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f6a6e4fdf12]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f6a6e505a66]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f6a6e502fcf]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f6a6bb9ede3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6a6ebd1650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f6a6ebd32e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f6a6ebd81bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f6a6ebd900e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x40816d]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f6a6b51a5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f6a6b51a668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 failed (exit 0)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fa8c83e4565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa8c83e492b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa8c83e4aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fa8cad5df12]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fa8cad65a66]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fa8cad62fcf]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa8c83fede3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa8cb431650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fa8cb4332e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fa8cb4381bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fa8cb43900e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x408087]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7fa8c7d7a5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa8c7d7a668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 312.46 ± 3.80 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.50 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f68ae79e565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f68ae79e92b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f68ae79eaaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f68b1a57eb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f68b1a5fb36]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f68b1a5d09f]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f68ae7b8de3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f68b2114650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f68b21162e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f68b211b1bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f68b211c00e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x407edc]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f68ae1345b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f68ae134668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 313.81 ± 0.68 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.48 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.58 ± 0.00 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.31 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 315.62 ± 2.64 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.51 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7effceeac565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7effceeac92b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7effceeacaaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7effd1825f12]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7effd182da66]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7effd182afcf]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7effceec6de3]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7effd1ef9650]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7effd1efb2e2]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7effd1f001bf]
|
||||||
|
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7effd1f0100e]
|
||||||
|
/usr/local/bin/llama-bench() [0x40a3db]
|
||||||
|
/usr/local/bin/llama-bench() [0x408087]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7effce8425b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7effce842668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.40 ± 0.48 |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.93 ± 0.23 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.58 ± 0.00 |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.43 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.52 ± 0.12 |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.05 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+29
@@ -0,0 +1,29 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.46 ± 0.00 |
|
||||||
|
:0:rocdevice.cpp :3588: 50932421658 us: Callback: Queue 0x7f8e6a000000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29
|
||||||
|
Kernel Name: _ZL18flash_attn_ext_vecILi128ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil
|
||||||
|
VGPU=0x94e06a0 SWq=0x7f8e6cbea000, HWq=0x7f8e6a000000, id=2
|
||||||
|
Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0
|
||||||
|
grid=[32, 68, 64], workgroup=[32, 4, 1]
|
||||||
|
private_seg_size=0, group_seg_size=4352
|
||||||
|
kernel_obj=0x7f8e6a78f180, kernarg_address=0x0x7f738bd49400
|
||||||
|
completion_signal=0x0, correlation_id=0
|
||||||
|
rptr=1368490, wptr=1369554
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f8e79498565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8e7949892b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f8e79498aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f8e7bd67fb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f8e7bd6d004]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f8e794af8ce]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f8e7c40f950]
|
||||||
|
/usr/local/bin/llama-bench() [0x408242]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f8e78e2e5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8e78e2e668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.69 ± 0.99 |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.05 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.47 ± 0.00 |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.20 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 352.23 ± 9.28 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 192.75 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 345.22 ± 23.61 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.84 ± 0.40 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+20
@@ -0,0 +1,20 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 135.26 ± 0.00 |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f83b9245565]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f83b924592b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f83b9245aaf]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f83bbb14fb2]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f83bbb1a004]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f83b925c8ce]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f83bc1bc950]
|
||||||
|
/usr/local/bin/llama-bench() [0x408242]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f83b8bdb5b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f83b8bdb668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409255]
|
||||||
|
✖ ! [rocm-7alpha-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 360.93 ± 3.44 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.17 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 197.49 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 579.57 ± 12.23 |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.33 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.50 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.86 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.31 ± 5.34 |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.66 ± 0.01 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.86 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.72 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.33 ± 7.18 |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.48 ± 0.01 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.69 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 669.29 ± 4.01 |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.10 ± 0.01 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.78 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.71 ± 0.00 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 666.63 ± 5.54 |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.62 ± 0.02 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 148.47 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.94 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4db63cdde (7085)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 676.38 ± 1.86 |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.44 ± 0.02 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.70 ± 0.00 |
|
||||||
|
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.64 ± 0.00 |
|
||||||
|
|
||||||
|
build: 4fc43d43d (7085)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 602.73 ± 3.88 |
|
||||||
|
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.21 ± 0.01 |
|
||||||
|
|
||||||
|
build: 12bb5c37 (7074)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user