Updated benchmarks
This commit is contained in:
@@ -0,0 +1,141 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
RESULTS_DIR_DEFAULT = "results"
|
||||||
|
|
||||||
|
# Same detection logic as your extractor
|
||||||
|
HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE)
|
||||||
|
SEP_RE = re.compile(r"^\|\s*-+")
|
||||||
|
|
||||||
|
LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE)
|
||||||
|
HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE)
|
||||||
|
GENERIC_ERR = re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_table(text):
|
||||||
|
lines = text.splitlines()
|
||||||
|
rows = []
|
||||||
|
header = None
|
||||||
|
col_idx = {}
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if HEADER_RE.search(line):
|
||||||
|
header = [c.strip().lower() for c in line.strip().strip("|").split("|")]
|
||||||
|
for idx, name in enumerate(header):
|
||||||
|
col_idx[name] = idx
|
||||||
|
continue
|
||||||
|
|
||||||
|
if header and (SEP_RE.search(line) or not line.strip()):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if header and line.startswith("|"):
|
||||||
|
parts = [c.strip() for c in line.strip().strip("|").split("|")]
|
||||||
|
if len(parts) < len(header):
|
||||||
|
continue
|
||||||
|
row = {}
|
||||||
|
for name, idx in col_idx.items():
|
||||||
|
row[name] = parts[idx]
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
if header and line.strip() == "" and rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def detect_error(text):
|
||||||
|
if LOAD_ERR.search(text):
|
||||||
|
return True
|
||||||
|
if HANG_ERR.search(text):
|
||||||
|
return True
|
||||||
|
if GENERIC_ERR.search(text):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_non_transient_vram_issue(text):
|
||||||
|
# Do NOT delete logs with this kind of Vulkan OOM
|
||||||
|
return (
|
||||||
|
"ggml_vulkan: Device memory allocation of size" in text
|
||||||
|
and "Requested buffer size exceeds device buffer size limit" in text
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_failed_run(text):
|
||||||
|
table_rows = parse_table(text)
|
||||||
|
|
||||||
|
has_pp = any(r.get("test", "").lower() == "pp512" for r in table_rows)
|
||||||
|
has_tg = any(r.get("test", "").lower() == "tg128" for r in table_rows)
|
||||||
|
|
||||||
|
if has_pp or has_tg:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return detect_error(text)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser(
|
||||||
|
description="Delete transient-failure benchmark logs in results/"
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--results-dir",
|
||||||
|
default=RESULTS_DIR_DEFAULT,
|
||||||
|
help="Directory containing *.log files (default: results)",
|
||||||
|
)
|
||||||
|
ap.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Only print what would be deleted",
|
||||||
|
)
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
results_dir = args.results_dir
|
||||||
|
pattern = os.path.join(results_dir, "*.log")
|
||||||
|
|
||||||
|
to_delete = []
|
||||||
|
skipped_non_transient = []
|
||||||
|
|
||||||
|
for path in sorted(glob.glob(pattern)):
|
||||||
|
try:
|
||||||
|
with open(path, errors="ignore") as f:
|
||||||
|
text = f.read()
|
||||||
|
except OSError as e:
|
||||||
|
print(f"Could not read {path}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not is_failed_run(text):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if is_non_transient_vram_issue(text):
|
||||||
|
skipped_non_transient.append(path)
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_delete.append(path)
|
||||||
|
|
||||||
|
if not to_delete and not skipped_non_transient:
|
||||||
|
print("No failed logs found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if skipped_non_transient:
|
||||||
|
print("Keeping logs with non transient VRAM issues:")
|
||||||
|
for p in skipped_non_transient:
|
||||||
|
print(f" KEEP {p}")
|
||||||
|
|
||||||
|
if to_delete:
|
||||||
|
print("Deleting logs with transient failures:")
|
||||||
|
for p in to_delete:
|
||||||
|
print(f" DELETE {p}")
|
||||||
|
if not args.dry_run:
|
||||||
|
try:
|
||||||
|
os.remove(p)
|
||||||
|
except OSError as e:
|
||||||
|
print(f" Failed to delete {p}: {e}")
|
||||||
|
else:
|
||||||
|
print("No logs to delete.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -34,26 +34,45 @@ NAME_B_RE = re.compile(r"(\d+(?:\.\d+)?)B")
|
|||||||
# Shard suffix in filenames
|
# Shard suffix in filenames
|
||||||
SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
|
SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE)
|
||||||
|
|
||||||
|
# Long-context suffix in filenames (e.g., __longctx32768)
|
||||||
|
LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE)
|
||||||
|
|
||||||
# --- Helpers ---------------------------------------------------------------
|
# --- Helpers ---------------------------------------------------------------
|
||||||
|
|
||||||
def clean_model_name(raw):
|
def clean_model_name(raw):
|
||||||
base = SHARD_RE.sub("", raw)
|
base = SHARD_RE.sub("", raw)
|
||||||
return base
|
return base
|
||||||
|
|
||||||
def parse_env_and_fa(basename):
|
def parse_env_flags(basename):
|
||||||
# pattern: <model>__<env>[__fa1][__hblt0]
|
"""
|
||||||
|
pattern: <model>__<env>[__fa1][__hblt0][__longctx32768]
|
||||||
|
Returns (env, fa, context_tag, context_tokens)
|
||||||
|
"""
|
||||||
parts = basename.split("__")
|
parts = basename.split("__")
|
||||||
if len(parts) < 2:
|
if len(parts) < 2:
|
||||||
return None, False
|
return None, False, "default", None
|
||||||
|
|
||||||
env = parts[1]
|
env = parts[1]
|
||||||
# scan any extra suffix segments
|
fa = False
|
||||||
suffixes = {p.lower() for p in parts[2:]}
|
context_tag = "default"
|
||||||
fa = ("fa1" in suffixes)
|
context_tokens = None
|
||||||
if "hblt0" in suffixes:
|
|
||||||
env = f"{env}-hblt0"
|
|
||||||
|
|
||||||
return env, fa
|
for raw_suffix in parts[2:]:
|
||||||
|
suffix = raw_suffix.lower()
|
||||||
|
if suffix == "fa1":
|
||||||
|
fa = True
|
||||||
|
elif suffix == "hblt0":
|
||||||
|
env = f"{env}-hblt0"
|
||||||
|
elif suffix.startswith("longctx"):
|
||||||
|
context_tag = suffix
|
||||||
|
m = LONGCTX_RE.search(suffix)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
context_tokens = int(m.group(1))
|
||||||
|
except ValueError:
|
||||||
|
context_tokens = None
|
||||||
|
|
||||||
|
return env, fa, context_tag, context_tokens
|
||||||
|
|
||||||
def env_base_and_variant(env):
|
def env_base_and_variant(env):
|
||||||
# e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
|
# e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma")
|
||||||
@@ -135,7 +154,8 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
model_raw, _rest = base.split("__", 1)
|
model_raw, _rest = base.split("__", 1)
|
||||||
env, fa_from_name = parse_env_and_fa(base)
|
env, fa_from_name, context_tag, context_tokens = parse_env_flags(base)
|
||||||
|
if env:
|
||||||
envs.add(env)
|
envs.add(env)
|
||||||
|
|
||||||
model_clean = clean_model_name(model_raw)
|
model_clean = clean_model_name(model_raw)
|
||||||
@@ -215,6 +235,8 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
|
|||||||
"env_base": env_base,
|
"env_base": env_base,
|
||||||
"env_variant": env_variant, # e.g. "rocwmma"
|
"env_variant": env_variant, # e.g. "rocwmma"
|
||||||
"fa": bool(fa_enabled),
|
"fa": bool(fa_enabled),
|
||||||
|
"context": context_tag or "default",
|
||||||
|
"context_tokens": context_tokens,
|
||||||
"test": test, # "pp512" | "tg128" | None (if error)
|
"test": test, # "pp512" | "tg128" | None (if error)
|
||||||
"tps_mean": tps_mean,
|
"tps_mean": tps_mean,
|
||||||
"tps_std": tps_std,
|
"tps_std": tps_std,
|
||||||
|
|||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 135.87 ± 0.06 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.27 ± 0.47 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.46 ± 0.01 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.61 ± 0.00 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 31df4608 (7038)
|
||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.09 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0xe277840) reason :GPU Hang
|
||||||
|
✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 160.81 ± 0.78 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 87.62 ± 0.29 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.41 ± 0.01 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.57 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 1c398dc9e (7034)
|
||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.99 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x3b2ac4f0) reason :GPU Hang
|
||||||
|
✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 93.53 ± 0.08 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.66 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x2cfb74f0) on address 0x7fb6c65be000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
rocBLAS error: No hipBLASLt solution found
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
|
|
||||||
|
|
||||||
rocBLAS warning: hipBlasLT failed, falling back to tensile.
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.16 ± 0.25 |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.46 ± 0.00 |
|
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.20 ± 0.28 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 135.10 ± 0.15 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.01 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.72 ± 0.00 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.41 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x3c42f5c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.53 ± 0.45 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 142.39 ± 0.25 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.08 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.02 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.30 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x1feee5c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
rocBLAS error: No hipBLASLt solution found
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
|
|
||||||
|
|
||||||
rocBLAS warning: hipBlasLT failed, falling back to tensile.
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.30 ± 0.14 |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.83 ± 0.00 |
|
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 136.00 ± 0.15 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 135.50 ± 0.06 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.93 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.19 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 30.21 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.28 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 132.96 ± 0.49 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 172.61 ± 0.32 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.99 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.78 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.91 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x1d99e5c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 77.55 ± 0.60 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.62 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x34ae58c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.50 ± 0.48 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.72 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.23 ± 0.00 |
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x15b288c0) on address 0x7f4ebb38d000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 99.42 ± 0.19 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.83 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.85 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 172.08 ± 0.59 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.73 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.69 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.85 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 67.08 ± 0.15 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 72.38 ± 0.12 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.07 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.73 ± 0.01 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: bca95ca51 (7036)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.40 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x1678a6f0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.71 ± 0.66 |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.00 |
|
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 148.21 ± 0.25 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 131.85 ± 0.23 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.10 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.71 ± 0.04 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: bca95ca51 (7036)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.15 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x106c26f0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 119.33 ± 0.28 |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.19 ± 0.00 |
|
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 93.03 ± 0.12 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 99.25 ± 0.11 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.00 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.54 ± 0.01 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x3c42b6f0) on address 0x7f0a849aa000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 136.81 ± 0.43 |
|
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.52 ± 0.00 |
|
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 148.95 ± 0.73 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 169.93 ± 0.33 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.71 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 102.61 ± 0.20 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.65 ± 0.00 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.54 ± 0.00 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 218.68 ± 0.54 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 217.91 ± 0.48 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.27 ± 0.01 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.50 ± 0.06 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+5
-5
@@ -1,8 +1,8 @@
|
|||||||
ggml_vulkan: Found 1 Vulkan devices:
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 216.84 ± 0.52 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.82 ± 0.00 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.15 ± 0.01 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.79 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 236.02 ± 2.60 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 235.07 ± 0.58 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.51 ± 0.01 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.84 ± 0.01 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+5
-5
@@ -1,8 +1,8 @@
|
|||||||
ggml_vulkan: Found 1 Vulkan devices:
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 223.39 ± 1.25 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.43 ± 0.00 |
|
||||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.06 ± 0.03 |
|
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.49 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 133.48 ± 0.45 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 92.38 ± 0.37 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.77 ± 0.11 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.64 ± 0.00 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 31df4608 (7038)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 140.68 ± 0.66 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.45 ± 0.00 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.84 ± 0.01 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.86 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 31df4608 (7038)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 86.50 ± 0.17 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.61 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.06 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x97894f0) reason :GPU Hang
|
||||||
|
✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 74.73 ± 0.27 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.66 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.97 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.58 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
rocBLAS error: No hipBLASLt solution found
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
|
|
||||||
|
|
||||||
rocBLAS warning: hipBlasLT failed, falling back to tensile.
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 118.41 ± 0.20 |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.75 ± 0.16 |
|
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 128.51 ± 0.51 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 125.43 ± 0.26 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.16 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.48 ± 0.14 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x3a41b5c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 143.55 ± 0.54 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 140.41 ± 0.79 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.17 ± 0.06 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.52 ± 0.05 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.20 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2b2915c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
rocBLAS error: No hipBLASLt solution found
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
|
|
||||||
|
|
||||||
rocBLAS warning: hipBlasLT failed, falling back to tensile.
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.76 ± 0.14 |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.69 ± 0.00 |
|
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 129.77 ± 0.12 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.63 ± 0.70 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.14 ± 0.00 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.44 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.75 ± 0.00 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.10 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.52 ± 0.53 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 145.79 ± 0.11 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.08 ± 0.21 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.57 ± 0.06 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.65 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x14c455c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 69.31 ± 0.07 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.50 ± 0.09 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.07 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x231278c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 136.65 ± 0.08 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.46 ± 0.16 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.05 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x3b6528c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 94.32 ± 0.20 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.25 ± 0.53 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x3cdc48c0) on address 0x7f1399b6e000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.72 ± 0.76 |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.54 ± 0.04 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x123888c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 97.09 ± 0.15 |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.89 ± 0.00 |
|
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 69.91 ± 0.44 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 114.56 ± 0.18 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.11 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.58 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: bca95ca51 (7036)
|
||||||
|
|||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 91.95 ± 0.23 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.46 ± 0.00 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.80 ± 0.00 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.65 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: bca95ca51 (7036)
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 128.74 ± 0.30 |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.77 ± 0.20 |
|
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 133.32 ± 0.82 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 159.14 ± 0.64 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.10 ± 0.20 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.44 ± 0.20 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: bca95ca51 (7036)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.46 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2566c6f0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 125.81 ± 0.29 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 96.45 ± 0.26 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.20 ± 0.00 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.51 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
Memory access fault by GPU node-1 (Agent handle: 0x19b4f6f0) on address 0x7f5ea34ff000. Reason: Page not present or supervisor privilege.
|
||||||
|
✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 142.12 ± 0.60 |
|
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.89 ± 0.00 |
|
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
|
||||||
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 139.60 ± 0.48 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.86 ± 0.36 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.10 ± 0.17 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.53 ± 0.06 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.62 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2d1506f0) reason :GPU Hang
|
||||||
|
✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 262.18 ± 1.19 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 261.54 ± 1.01 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.30 ± 0.01 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.42 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+5
-5
@@ -1,8 +1,8 @@
|
|||||||
ggml_vulkan: Found 1 Vulkan devices:
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 260.51 ± 1.03 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 23.19 ± 0.00 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.26 ± 0.01 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.25 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
|
|||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 233.21 ± 6.28 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 233.87 ± 0.08 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.65 ± 0.01 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.74 ± 0.01 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+5
-5
@@ -1,8 +1,8 @@
|
|||||||
ggml_vulkan: Found 1 Vulkan devices:
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 222.31 ± 0.71 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.31 ± 0.00 |
|
||||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.43 ± 0.01 |
|
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.00 ± 0.00 |
|
||||||
|
|
||||||
build: a3cb0474 (6735)
|
build: 1c398dc9e (7034)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.82 ± 0.24 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.85 ± 0.10 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 31df4608 (7038)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.12 ± 0.10 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.88 ± 0.00 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 31df4608 (7038)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.65 ± 0.07 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.76 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.80 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 105.64 ± 0.24 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.13 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.31 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
-15
@@ -1,15 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
hipBLASLt error: Heuristic Fetch Failed!
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
|
|
||||||
|
|
||||||
rocBLAS warning: hipBlasLT failed, falling back to tensile.
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.00 ± 0.05 |
|
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.01 |
|
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.65 ± 0.17 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.56 ± 0.06 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.74 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x1ed645c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.51 ± 0.05 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.23 ± 0.06 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
build: 11f0af55 (6736)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+9
@@ -0,0 +1,9 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.69 ± 0.00 |
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x147425c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134)
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|
||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
|
||||||
hipBLASLt error: Heuristic Fetch Failed!
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
|
|
||||||
|
|
||||||
rocBLAS warning: hipBlasLT failed, falling back to tensile.
|
|
||||||
This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
|
|
||||||
| model | size | params | backend | ngl | mmap | test | t/s |
|
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
|
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.85 ± 0.06 |
|
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.76 ± 0.00 |
|
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.47 ± 0.08 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 105.28 ± 0.10 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.77 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+6
@@ -0,0 +1,6 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
HW Exception by GPU node-1 (Agent handle: 0x2dba35c0) reason :GPU Hang
|
||||||
|
✖ ! [rocm6_4_4] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134)
|
||||||
+5
-5
@@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
|||||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
ggml_cuda_init: found 1 ROCm devices:
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.23 ± 0.24 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 104.32 ± 0.29 |
|
||||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
build: 4807e8f9 (6609)
|
build: 1c398dc9e (7034)
|
||||||
|
|||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.35 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.40 ± 0.00 |
|
||||||
|
|
||||||
|
build: 1c398dc9e (7034)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.80 ± 0.11 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.25 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.92 ± 0.14 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||||
|
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.47 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 |
|
||||||
|
|
||||||
|
build: ee8dd5c65 (7035)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user