adding raw benchmark results
This commit is contained in:
@@ -0,0 +1,239 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import re, glob, os, json, time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
RESULTS_DIR = "results"
|
||||||
|
OUT_JSON = "results.json"
|
||||||
|
|
||||||
|
# --- Regexes ---------------------------------------------------------------
|
||||||
|
|
||||||
|
# Table headers come in two shapes (with or without "fa" column)
|
||||||
|
HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE)
|
||||||
|
SEP_RE = re.compile(r"^\|\s*-+")
|
||||||
|
|
||||||
|
# Build line, e.g. "build: cd6983d5 (6119)"
|
||||||
|
BUILD_RE = re.compile(r"build:\s*([0-9a-f]{7,})\s*\((\d+)\)", re.IGNORECASE)
|
||||||
|
|
||||||
|
# Error classifiers (same spirit as your table script)
|
||||||
|
LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE)
|
||||||
|
HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
|
||||||
|
GENERIC_ERR= re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE)
|
||||||
|
|
||||||
|
# Extract numeric ± numeric from the last column
|
||||||
|
TS_RE = re.compile(r"([\d.]+)\s*±\s*([\d.]+)")
|
||||||
|
|
||||||
|
# Quantization from model name
|
||||||
|
QUANT_RE = re.compile(r"(Q\d+_[A-Z_]+|BF16|F16|F32|mxfp\d+)", re.IGNORECASE)
|
||||||
|
|
||||||
|
# Params like "235.09 B" from the table
|
||||||
|
PARAMS_RE = re.compile(r"([\d.]+)\s*B", re.IGNORECASE)
|
||||||
|
# File size like "96.99 GiB" from the table
|
||||||
|
GIB_RE = re.compile(r"([\d.]+)\s*GiB", re.IGNORECASE)
|
||||||
|
|
||||||
|
# "30B", "235B" from model name
|
||||||
|
NAME_B_RE = re.compile(r"(\d+(?:\.\d+)?)B")
|
||||||
|
|
||||||
|
# Shard suffix in filenames
|
||||||
|
SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
|
||||||
|
|
||||||
|
# --- Helpers ---------------------------------------------------------------
|
||||||
|
|
||||||
|
def clean_model_name(raw):
|
||||||
|
base = SHARD_RE.sub("", raw)
|
||||||
|
return base
|
||||||
|
|
||||||
|
def parse_env_and_fa(basename):
|
||||||
|
# pattern: <model>__<env>[__fa1]
|
||||||
|
parts = basename.split("__")
|
||||||
|
if len(parts) < 2:
|
||||||
|
return None, False
|
||||||
|
env = parts[1]
|
||||||
|
fa = (len(parts) > 2 and parts[2].lower() == "fa1")
|
||||||
|
return env, fa
|
||||||
|
|
||||||
|
def env_base_and_variant(env):
|
||||||
|
# e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
|
||||||
|
if "-" in env:
|
||||||
|
base, variant = env.split("-", 1)
|
||||||
|
return base, variant
|
||||||
|
return env, None
|
||||||
|
|
||||||
|
def detect_error(text):
|
||||||
|
if LOAD_ERR.search(text):
|
||||||
|
return True, "load"
|
||||||
|
if HANG_ERR.search(text):
|
||||||
|
return True, "hang"
|
||||||
|
if GENERIC_ERR.search(text):
|
||||||
|
return True, "runtime"
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
def parse_table(text):
|
||||||
|
"""
|
||||||
|
Returns list of rows parsed from the markdown-like table.
|
||||||
|
Each row is a dict of the parsed columns, normalized by header names.
|
||||||
|
Handles presence/absence of the 'fa' column.
|
||||||
|
"""
|
||||||
|
lines = text.splitlines()
|
||||||
|
rows = []
|
||||||
|
header = None
|
||||||
|
col_idx = {}
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if HEADER_RE.search(line):
|
||||||
|
# header line
|
||||||
|
header = [c.strip().lower() for c in line.strip().strip("|").split("|")]
|
||||||
|
# next line should be the separator; skip it
|
||||||
|
# build index map
|
||||||
|
for idx, name in enumerate(header):
|
||||||
|
col_idx[name] = idx
|
||||||
|
continue
|
||||||
|
if header and (SEP_RE.search(line) or not line.strip()):
|
||||||
|
# skip separators / blanks after header
|
||||||
|
continue
|
||||||
|
if header and line.startswith("|"):
|
||||||
|
parts = [c.strip() for c in line.strip().strip("|").split("|")]
|
||||||
|
# guard for short lines
|
||||||
|
if len(parts) < len(header):
|
||||||
|
continue
|
||||||
|
row = {}
|
||||||
|
for name, idx in col_idx.items():
|
||||||
|
row[name] = parts[idx]
|
||||||
|
rows.append(row)
|
||||||
|
# stop parsing block when a blank line after some rows appears
|
||||||
|
if header and line.strip() == "" and rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def coerce_float(m, default=None):
|
||||||
|
try:
|
||||||
|
return float(m)
|
||||||
|
except:
|
||||||
|
return default
|
||||||
|
|
||||||
|
def extract_quant(model_name):
|
||||||
|
m = QUANT_RE.search(model_name)
|
||||||
|
return (m.group(1).upper() if m else None)
|
||||||
|
|
||||||
|
def b_from_name(model_name):
|
||||||
|
m = NAME_B_RE.search(model_name)
|
||||||
|
return coerce_float(m.group(1)) if m else None
|
||||||
|
|
||||||
|
# --- Main scan -------------------------------------------------------------
|
||||||
|
|
||||||
|
runs = []
|
||||||
|
builds = set()
|
||||||
|
envs = set()
|
||||||
|
|
||||||
|
for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
|
||||||
|
base = os.path.basename(path).rsplit(".log", 1)[0]
|
||||||
|
if "__" not in base:
|
||||||
|
continue
|
||||||
|
|
||||||
|
model_raw, _rest = base.split("__", 1)
|
||||||
|
env, fa_from_name = parse_env_and_fa(base)
|
||||||
|
envs.add(env)
|
||||||
|
|
||||||
|
model_clean = clean_model_name(model_raw)
|
||||||
|
|
||||||
|
with open(path, errors="ignore") as f:
|
||||||
|
text = f.read()
|
||||||
|
|
||||||
|
# build info (take the last match in file if many)
|
||||||
|
build_hash, build_num = None, None
|
||||||
|
for m in BUILD_RE.finditer(text):
|
||||||
|
build_hash, build_num = m.group(1), m.group(2)
|
||||||
|
if build_hash:
|
||||||
|
builds.add((build_hash, build_num))
|
||||||
|
|
||||||
|
# detect error (if there is no valid table rows)
|
||||||
|
table_rows = parse_table(text)
|
||||||
|
|
||||||
|
# If table rows exist, we’ll still mark errors only if no perf found
|
||||||
|
has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows)
|
||||||
|
has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows)
|
||||||
|
error, etype = (False, None)
|
||||||
|
if not (has_pp or has_tg):
|
||||||
|
error, etype = detect_error(text)
|
||||||
|
|
||||||
|
# Determine FA flag:
|
||||||
|
# prefer explicit column "fa" if present, else fallback to filename "__fa1"
|
||||||
|
fa_in_table = None
|
||||||
|
for r in table_rows:
|
||||||
|
if "fa" in r:
|
||||||
|
try:
|
||||||
|
fa_in_table = int(r["fa"]) == 1
|
||||||
|
except:
|
||||||
|
fa_in_table = None
|
||||||
|
break
|
||||||
|
fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name
|
||||||
|
|
||||||
|
# Normalize env base / variant (e.g., rocwmma)
|
||||||
|
env_base, env_variant = env_base_and_variant(env)
|
||||||
|
|
||||||
|
# Emit one run per row (pp512 / tg128)
|
||||||
|
for r in table_rows or [{}]:
|
||||||
|
test = r.get("test", "").lower() if table_rows else None
|
||||||
|
tps_mean, tps_std = None, None
|
||||||
|
if table_rows:
|
||||||
|
ts_field = r.get("t/s", "")
|
||||||
|
m = TS_RE.search(ts_field)
|
||||||
|
if m:
|
||||||
|
tps_mean = coerce_float(m.group(1))
|
||||||
|
tps_std = coerce_float(m.group(2))
|
||||||
|
|
||||||
|
# parse numeric helpers from row (if present)
|
||||||
|
params_b = None
|
||||||
|
file_size_gib = None
|
||||||
|
if "params" in r:
|
||||||
|
pm = PARAMS_RE.search(r["params"])
|
||||||
|
if pm: params_b = coerce_float(pm.group(1))
|
||||||
|
if "size" in r:
|
||||||
|
sm = GIB_RE.search(r["size"])
|
||||||
|
if sm: file_size_gib = coerce_float(sm.group(1))
|
||||||
|
|
||||||
|
backend = r.get("backend")
|
||||||
|
ngl = r.get("ngl")
|
||||||
|
mmap = r.get("mmap")
|
||||||
|
|
||||||
|
# quant from model name
|
||||||
|
quant = extract_quant(model_clean)
|
||||||
|
name_params_b = b_from_name(model_clean)
|
||||||
|
|
||||||
|
run = {
|
||||||
|
"model": model_raw,
|
||||||
|
"model_clean": model_clean,
|
||||||
|
"env": env,
|
||||||
|
"env_base": env_base,
|
||||||
|
"env_variant": env_variant, # e.g. "rocwmma"
|
||||||
|
"fa": bool(fa_enabled),
|
||||||
|
"test": test, # "pp512" | "tg128" | None (if error)
|
||||||
|
"tps_mean": tps_mean,
|
||||||
|
"tps_std": tps_std,
|
||||||
|
"error": bool(error),
|
||||||
|
"error_type": etype, # "load" | "hang" | "runtime" | None
|
||||||
|
"backend": backend,
|
||||||
|
"ngl": (int(ngl) if (ngl and ngl.isdigit()) else None),
|
||||||
|
"mmap": (int(mmap) if (mmap and mmap.isdigit()) else None),
|
||||||
|
"params_b": params_b, # from table, if available
|
||||||
|
"file_size_gib": file_size_gib, # from table, if available
|
||||||
|
"name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0)
|
||||||
|
"quant": quant,
|
||||||
|
"log": path,
|
||||||
|
"build": {"hash": build_hash, "number": build_num} if build_hash else None,
|
||||||
|
}
|
||||||
|
runs.append(run)
|
||||||
|
|
||||||
|
# Meta
|
||||||
|
meta = {
|
||||||
|
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||||
|
"os_kernel": "Fedora 42 — Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)",
|
||||||
|
"llamacpp_builds": [{"hash": h, "number": n} for (h, n) in sorted(builds)],
|
||||||
|
"environments": sorted(envs),
|
||||||
|
"notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second",
|
||||||
|
}
|
||||||
|
|
||||||
|
out = {"meta": meta, "runs": runs}
|
||||||
|
|
||||||
|
Path(OUT_JSON).write_text(json.dumps(out, indent=2))
|
||||||
|
print(f"Wrote {OUT_JSON} with {len(runs)} rows.")
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2edd2a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x432ea90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
Memory access fault by GPU node-1 (Agent handle: 0x7f5e570) on address 0x7f3192c0f000. Reason: Page not present or supervisor privilege.
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 129.88 ± 0.57 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.43 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x834aa90) on address 0x7f10fb96f000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x16bd82e0) reason :GPU Hang
|
HW Exception by GPU node-1 (Agent handle: 0x100d3790) reason :GPU Hang
|
||||||
✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
|
✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x13829790) on address 0x7fa8ef9a9000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.20 ± 0.38 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 130.17 ± 0.38 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.61 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.83 ± 0.00 |
|
||||||
|
|
||||||
build: 0d883154 (6101)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 103.63 ± 0.10 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.09 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 199.54 ± 0.38 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 200.76 ± 0.32 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.75 ± 0.01 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.78 ± 0.00 |
|
||||||
|
|
||||||
build: 0d883154 (6101)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 201.86 ± 0.27 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 22.83 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 128.00 ± 0.23 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.73 ± 0.23 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.88 ± 0.02 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.88 ± 0.02 |
|
||||||
|
|
||||||
build: 0d883154 (6101)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 132.54 ± 0.34 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 23.31 ± 0.01 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 113.62 ± 0.21 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.47 ± 0.04 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2f508a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 124.86 ± 0.54 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.82 ± 0.18 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.27 ± 0.00 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.35 ± 0.00 |
|
||||||
|
|
||||||
build: 0d883154 (6101)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x1527fa90) on address 0x7f55d5f6f000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
@@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x2a5da2e0) reason :GPU Hang
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134)
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 120.54 ± 0.30 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2a849790) reason :GPU Hang
|
||||||
|
✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134)
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.18 ± 0.48 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 221.02 ± 0.58 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 223.02 ± 0.69 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.47 ± 0.01 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.47 ± 0.01 |
|
||||||
|
|
||||||
build: 0d883154 (6101)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 224.54 ± 0.65 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.86 ± 0.40 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.36 ± 0.46 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.76 ± 0.00 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.78 ± 0.01 |
|
||||||
|
|
||||||
build: 0d883154 (6101)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 131.78 ± 0.46 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.99 ± 0.01 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x121f0a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x17018a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x68b7b10) reason :GPU Hang
|
HW Exception by GPU node-1 (Agent handle: 0x11442a90) reason :GPU Hang
|
||||||
✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x64dea90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x1587b430) reason :GPU Hang
|
HW Exception by GPU node-1 (Agent handle: 0xa636790) reason :GPU Hang
|
||||||
✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x1417b7b0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 33.30 ± 0.04 |
|
||||||
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.64 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | pp512 | 31.09 ± 0.02 |
|
||||||
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | tg128 | 2.65 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -4,5 +4,5 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16:
|
|||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
ggml_vulkan: Device memory allocation of size 2491416576 failed.
|
ggml_vulkan: Device memory allocation of size 2491416576 failed.
|
||||||
ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
|
ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
|
||||||
main: error: failed to load model '/home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf'
|
main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf'
|
||||||
✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1)
|
✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1)
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
ggml_vulkan: Device memory allocation of size 2491416576 failed.
|
||||||
|
ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
|
||||||
|
main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf'
|
||||||
|
✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 1)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 99 | 0 | pp512 | 76.48 ± 0.23 |
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | pp512 | 78.70 ± 0.20 |
|
||||||
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 99 | 0 | tg128 | 2.65 ± 0.00 |
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | tg128 | 2.66 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | pp512 | 81.29 ± 0.14 |
|
||||||
|
| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | tg128 | 2.66 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0xcd80a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x1496da90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.17 ± 0.07 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.32 ± 0.04 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.72 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.28 ± 0.02 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.74 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0xa5e9440) reason :GPU Hang
|
HW Exception by GPU node-1 (Agent handle: 0xfeef7b0) reason :GPU Hang
|
||||||
✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x6d017c0) on address 0x7f967f1a9000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 96.23 ± 0.16 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 98.14 ± 0.14 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.72 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 |
|
||||||
|
|
||||||
build: 9c35706b (6060)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 99.24 ± 0.16 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.72 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 79.71 ± 0.13 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 80.11 ± 0.09 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.72 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 82.90 ± 0.14 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x28bb9a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x194fea90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 121.52 ± 0.98 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 134.39 ± 0.32 |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.28 ± 0.00 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.33 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x3b11ea90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
|
||||||
+1
-1
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x27159430) reason :GPU Hang
|
HW Exception by GPU node-1 (Agent handle: 0x17ad57b0) reason :GPU Hang
|
||||||
✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
|
✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x2314b7b0) on address 0x7f38249a9000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
|
||||||
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 135.36 ± 0.39 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.25 ± 0.50 |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.29 ± 0.00 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 |
|
||||||
|
|
||||||
build: 4cb208c9 (6066)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+5
@@ -0,0 +1,5 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 243.19 ± 1.20 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 243.45 ± 1.29 |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.28 ± 0.03 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.29 ± 0.01 |
|
||||||
|
|
||||||
build: 9c35706b (6060)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 247.48 ± 1.28 |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.03 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 137.97 ± 0.99 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 148.25 ± 0.91 |
|
||||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.07 ± 0.05 |
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.21 ± 0.06 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 149.82 ± 0.83 |
|
||||||
|
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.21 ± 0.04 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x9ae6a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x6e9ba90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
|
||||||
@@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x2b17db10) reason :GPU Hang
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.44 ± 0.76 |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.61 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2fba3a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
|
||||||
+1
-1
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x1a77430) reason :GPU Hang
|
HW Exception by GPU node-1 (Agent handle: 0x4081f7b0) reason :GPU Hang
|
||||||
✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
|
✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x3c0f27b0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
|
||||||
+5
@@ -0,0 +1,5 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 238.93 ± 2.89 |
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 258.18 ± 1.38 |
|
||||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.25 ± 0.01 |
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.23 ± 0.01 |
|
||||||
|
|
||||||
build: 9c35706b (6060)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 260.16 ± 1.44 |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.09 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 145.86 ± 2.44 |
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 168.63 ± 0.81 |
|
||||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.27 ± 0.00 |
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.26 ± 0.01 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 172.37 ± 0.92 |
|
||||||
|
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.25 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x1a40fa90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2e0ffa90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
+3
-3
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 132.66 ± 0.56 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.27 ± 0.66 |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.29 ± 0.00 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.40 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x3a741a90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
|
||||||
+3
-3
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 133.71 ± 0.64 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.90 ± 0.66 |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.35 ± 0.00 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.62 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.61 ± 0.50 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.60 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.58 ± 0.18 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.55 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 208.84 ± 1.35 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 218.18 ± 0.83 |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.06 ± 0.01 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.04 ± 0.02 |
|
||||||
|
|
||||||
build: 9c35706b (6060)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 221.15 ± 0.74 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.58 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 133.49 ± 1.83 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 152.21 ± 0.66 |
|
||||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 19.99 ± 0.01 |
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 19.98 ± 0.01 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 155.22 ± 1.09 |
|
||||||
|
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.93 ± 0.01 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x153dfa90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2bd2ba90) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
+3
-3
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 69.48 ± 0.09 |
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 74.15 ± 0.18 |
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.54 ± 0.01 |
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.73 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x25011a90) on address 0x7fdcc1b6f000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
+1
-1
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
HW Exception by GPU node-1 (Agent handle: 0x1a8d440) reason :GPU Hang
|
HW Exception by GPU node-1 (Agent handle: 0x513c7b0) reason :GPU Hang
|
||||||
✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
|
✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x2567c7c0) on address 0x7ee66236f000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
+1
-6
@@ -2,9 +2,4 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 74.69 ± 0.17 |
|
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.56 ± 0.00 |
|
|
||||||
|
|
||||||
build: 4cb208c9 (6066)
|
|
||||||
|
|||||||
+5
@@ -0,0 +1,5 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 99.94 ± 0.91 |
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 114.49 ± 0.60 |
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 15.72 ± 0.01 |
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 15.98 ± 0.01 |
|
||||||
|
|
||||||
build: 9c35706b (6060)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 116.07 ± 0.64 |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 15.84 ± 0.01 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
+3
-3
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 58.40 ± 0.21 |
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 64.85 ± 0.38 |
|
||||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 16.29 ± 0.01 |
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.58 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 66.76 ± 0.43 |
|
||||||
|
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 16.83 ± 0.01 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.95 ± 2.63 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.53 ± 0.01 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 162.19 ± 3.06 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.03 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.74 ± 2.65 |
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.69 ± 2.52 |
|
||||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 22.88 ± 0.01 |
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.89 ± 0.01 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 140.32 ± 2.10 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.33 ± 0.00 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
|
|||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
||||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 151.25 ± 3.33 |
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 153.49 ± 1.19 |
|
||||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 23.80 ± 0.09 |
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.52 ± 0.00 |
|
||||||
|
|
||||||
build: 66625a59 (6040)
|
build: cd6983d5 (6119)
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 138.49 ± 2.52 |
|
||||||
|
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.35 ± 0.01 |
|
||||||
|
|
||||||
|
build: cd6983d5 (6119)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user