From 3710de5d177dc609ff6a2dbd59615964bb2b43da Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 6 Aug 2025 19:14:42 +0100 Subject: [PATCH] Added link to YouTube video and updated benchmarks --- README.md | 25 ++-- benchmark/generate_readme_summary.py | 108 ++++++++++++++++++ ...r-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log | 6 + ...-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log | 6 + ...ir-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 10 ++ ...-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log | 8 ++ ...UD-Q4_K_XL-00001-of-00002__vulkan_radv.log | 8 ++ ...r-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log | 10 ++ ...-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log | 6 + ...ir-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log | 5 + ...-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log | 8 ++ ...UD-Q6_K_XL-00001-of-00003__vulkan_radv.log | 8 ++ .../gemma-3-4b-it-Q3_K_S__rocm6_4_2.log | 10 ++ .../gemma-3-4b-it-Q3_K_S__rocm7_beta.log | 10 ++ .../gemma-3-4b-it-Q3_K_S__rocm7_rc.log | 10 ++ .../gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log | 8 ++ .../gemma-3-4b-it-Q3_K_S__vulkan_radv.log | 8 ++ .../results/gpt-oss-120b-F16__rocm6_4_2.log | 6 + .../results/gpt-oss-120b-F16__rocm7_beta.log | 10 ++ .../results/gpt-oss-120b-F16__rocm7_rc.log | 10 ++ .../gpt-oss-120b-F16__vulkan_amdvlk.log | 8 ++ .../results/gpt-oss-120b-F16__vulkan_radv.log | 8 ++ ...s-120b-mxfp4-00001-of-00003__rocm6_4_2.log | 10 ++ ...-120b-mxfp4-00001-of-00003__rocm7_beta.log | 6 + ...ss-120b-mxfp4-00001-of-00003__rocm7_rc.log | 10 ++ ...0b-mxfp4-00001-of-00003__vulkan_amdvlk.log | 8 ++ ...120b-mxfp4-00001-of-00003__vulkan_radv.log | 8 ++ .../results/gpt-oss-20b-F32__rocm6_4_2.log | 10 ++ .../results/gpt-oss-20b-F32__rocm7_beta.log | 10 ++ .../results/gpt-oss-20b-F32__rocm7_rc.log | 10 ++ .../gpt-oss-20b-F32__vulkan_amdvlk.log | 8 ++ .../results/gpt-oss-20b-F32__vulkan_radv.log | 8 ++ .../results/gpt-oss-20b-mxfp4__rocm6_4_2.log | 10 ++ .../results/gpt-oss-20b-mxfp4__rocm7_beta.log | 10 ++ .../results/gpt-oss-20b-mxfp4__rocm7_rc.log | 10 ++ .../gpt-oss-20b-mxfp4__vulkan_amdvlk.log | 8 ++ .../gpt-oss-20b-mxfp4__vulkan_radv.log | 8 ++ 37 files changed, 421 insertions(+), 9 deletions(-) create mode 100644 benchmark/generate_readme_summary.py create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_beta.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_rc.log create mode 100644 benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log create mode 100644 benchmark/results/gpt-oss-120b-F16__vulkan_radv.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_beta.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_rc.log create mode 100644 benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log create mode 100644 benchmark/results/gpt-oss-20b-F32__vulkan_radv.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log diff --git a/README.md b/README.md index 2150d4f..4e236c7 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,10 @@ This project provides pre-built containers (“toolboxes”) for running LLMs on **AMD Ryzen AI Max “Strix Halo”** integrated GPUs. Toolbx is the standard developer container system in Fedora (and now works on Ubuntu, openSUSE, Arch, etc). +## Watch the YouTube Video + +[![Watch the YouTube Video](https://img.youtube.com/vi/wCBLMXgk3No/maxresdefault.jpg)](https://youtu.be/wCBLMXgk3No) + ## Why Toolbx? * Reproducible: never pollute your host system @@ -144,15 +148,18 @@ HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download unsloth/Qwen3-Coder-30B-A3B Below are some results from real runs on Strix Halo hardware of `llama-bench`. For full tables and model-by-model breakdowns (including both prompt processing and token generation speeds), see [docs/benchmarks.md](docs/benchmarks.md). -| Model | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 7.0 Beta | ROCm 7.0 RC | 🏆 Best PP | 🏆 Best TG | -| ------------------------------ | ---------------- | ---------------- | ---------------- | ---------------- | ---------------- | ------------- | -------------- | -| **Gemma3 12B Q8\_0** | 686 pp / 13.9 tg | 509 pp / 13.7 tg | 223 pp / 13.8 tg | 223 pp / 13.8 tg | 223 pp / 13.8 tg | **AMDVLK** | **AMDVLK** | -| **Gemma3 27B BF16** | ❌ Crash | 135 pp / 4.0 tg | 89 pp / 4.0 tg | 82 pp / 4.0 tg | 83 pp / 4.0 tg | **RADV** | **ROCm6.4.2** | -| **Llama-4-Scout 17B Q8\_0** | 241 pp / 12.3 tg | 146 pp / 12.3 tg | ❌ Crash | ❌ Crash | ❌ Crash | **AMDVLK** | **AMDVLK** | -| **Llama-4-Scout 17B Q4\_K XL** | 209 pp / 20.1 tg | 133 pp / 20.0 tg | 133 pp / 17.3 tg | 134 pp / 17.3 tg | ❌ Crash | **AMDVLK** | **AMDVLK** | -| **Qwen3 MoE 30B BF16** | 90 pp / 8.0 tg | 71 pp / 7.3 tg | 158 pp / 22.9 tg | 151 pp / 23.8 tg | 155 pp / 23.1 tg | **ROCm6.4.2** | **ROCm7 Beta** | -| **Qwen3-235B Q3\_K XL** | 99 pp / 15.7 tg | 58 pp / 16.3 tg | 69 pp / 13.5 tg | ❌ Crash | 75 pp / 13.6 tg | **AMDVLK** | **RADV** | - +| Model | Vulkan (AMDVLK) | Vulkan (RADV) | ROCm 6.4.2 | ROCm 7.0 Beta | ROCm 7.0 RC | 🏆 Best PP | 🏆 Best TG | +|---|---|---|---|---|---|---|---| +| **Gemma3 12B Q8_0** | 683 pp / 13.8 tg | 509 pp / 13.7 tg | 223 pp / 13.8 tg | 223 pp / 13.8 tg | 223 pp / 13.8 tg | 🏆 **AMDVLK** | 🏆 **AMDVLK** | +| **Gemma3 27B BF16** | ⚠️ Load Error | 135 pp / 4.0 tg | 89 pp / 4.0 tg | 82 pp / 4.0 tg | 83 pp / 4.0 tg | 🏆 **RADV** | 🏆 **ROCm6.4.2** | +| **Llama-4-Scout 17B Q8_0** | 239 pp / 12.2 tg | 146 pp / 12.3 tg | ⚠️ GPU Hang | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** | +| **Llama-4-Scout 17B Q4_K XL** | 209 pp / 20.1 tg | 133 pp / 20.0 tg | 133 pp / 17.3 tg | 134 pp / 17.4 tg | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **AMDVLK** | +| **Qwen3 30B BF16** | 91 pp / 8.0 tg | 71 pp / 7.3 tg | 158 pp / 22.9 tg | 151 pp / 23.8 tg | 155 pp / 23.1 tg | 🏆 **ROCm6.4.2** | 🏆 **ROCm7 Beta** | +| **Qwen3-235B Q3_K XL** | 100 pp / 15.7 tg | 58 pp / 16.3 tg | 69 pp / 13.5 tg | ⚠️ GPU Hang | 75 pp / 13.6 tg | 🏆 **AMDVLK** | 🏆 **RADV** | +| **GLM-4.5-Air-UD-Q4_K_XL** | 200 pp / 22.8 tg | 128 pp / 22.9 tg | ⚠️ Runtime Error | ⚠️ GPU Hang | 129 pp / 19.6 tg | 🏆 **AMDVLK** | 🏆 **RADV** | +| **GLM-4.5-Air-UD-Q6_K_XL** | 221 pp / 16.5 tg | 127 pp / 16.8 tg | 125 pp / 15.3 tg | ⚠️ GPU Hang | ⚠️ Runtime Error | 🏆 **AMDVLK** | 🏆 **RADV** | +| **gpt-oss-120b-mxfp4** | 486 pp / 48.1 tg | 239 pp / 48.9 tg | 353 pp / 43.6 tg | ⚠️ GPU Hang | 351 pp / 44.6 tg | 🏆 **AMDVLK** | 🏆 **RADV** | +| **gpt-oss-20b-mxfp4** | 1206 pp / 68.9 tg | 647 pp / 69.8 tg | 581 pp / 64.3 tg | 584 pp / 64.4 tg | 584 pp / 64.4 tg | 🏆 **AMDVLK** | 🏆 **RADV** | * **pp = tokens/sec, prompt processing (pre-fill, max speed)** * **tg = tokens/sec, generation (interactive, single token at a time)** diff --git a/benchmark/generate_readme_summary.py b/benchmark/generate_readme_summary.py new file mode 100644 index 0000000..e565b4e --- /dev/null +++ b/benchmark/generate_readme_summary.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +import re, glob, os, argparse + +PP_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*pp512\s*\|\s*([\d.]+)\s*±\s*([\d.]+)") +TG_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*tg128\s*\|\s*([\d.]+)\s*±\s*([\d.]+)") +LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed", re.IGNORECASE) +HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE) +GEN_ERR = re.compile(r"error:|exit \d+", re.IGNORECASE) + +ENV_ORDER = ["vulkan_amdvlk","vulkan_radv","rocm6_4_2","rocm7_beta","rocm7_rc"] +COL_NAMES = { + "vulkan_amdvlk":"Vulkan (AMDVLK)", + "vulkan_radv":"Vulkan (RADV)", + "rocm6_4_2":"ROCm 6.4.2", + "rocm7_beta":"ROCm 7.0 Beta", + "rocm7_rc":"ROCm 7.0 RC", +} +WINNER = { + "vulkan_amdvlk":"AMDVLK", + "vulkan_radv":"RADV", + "rocm6_4_2":"ROCm6.4.2", + "rocm7_beta":"ROCm7 Beta", + "rocm7_rc":"ROCm7 RC", +} + +DEFAULT_MODELS = [ + ("Gemma3 12B Q8_0", "gemma-3-12b-it-UD-Q8_K_XL"), + ("Gemma3 27B BF16", "gemma-3-27b-it-BF16"), + ("Llama-4-Scout 17B Q8_0", "Llama-4-Scout-17B-16E-Instruct-Q8_0"), + ("Llama-4-Scout 17B Q4_K XL", "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL"), + ("Qwen3 30B BF16", "Qwen3-30B-A3B-BF16"), + ("Qwen3-235B Q3_K XL", "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL"), + ("GLM-4.5-Air-UD-Q4_K_XL", "GLM-4.5-Air-UD-Q4_K_XL"), + ("GLM-4.5-Air-UD-Q6_K_XL", "GLM-4.5-Air-UD-Q6_K_XL"), + ("gpt-oss-120b-mxfp4", "gpt-oss-120b-mxfp4"), + ("gpt-oss-20b-mxfp4", "gpt-oss-20b-mxfp4"), +] + +CLEAN = lambda s: re.sub(r"-000\d+-of-000\d+", "", s) + +def parse_logs(): + data = {} + for p in glob.glob(os.path.join("results","*.log")): + base = os.path.basename(p)[:-4] + if "__" not in base: + continue + model_raw, env = base.split("__", 1) + key = CLEAN(model_raw) + t = open(p, errors="ignore").read() + pp = PP_RE.search(t) + tg = TG_RE.search(t) + et = None + if LOAD_ERR.search(t): et = "load" + elif HANG_ERR.search(t): et = "hang" + elif GEN_ERR.search(t) and not (pp and tg): et = "runtime" + data.setdefault(key, {"pp512": {}, "tg128": {}}) + data[key]["pp512"][env] = {"mean": float(pp.group(1)) if (pp and et is None) else None, + "error": et is not None, "etype": et} + data[key]["tg128"][env] = {"mean": float(tg.group(1)) if (tg and et is None) else None, + "error": et is not None, "etype": et} + return data + +def best(env_data): + vals = {e:d["mean"] for e,d in env_data.items() if (not d["error"]) and d["mean"] is not None} + return max(vals, key=vals.get) if vals else None + +def cell(pp, tg): + if (pp is None) or (tg is None): + return "—" + if pp["error"] or tg["error"]: + m = pp["etype"] or tg["etype"] or "runtime" + return {"load":"⚠️ Load Error","hang":"⚠️ GPU Hang","runtime":"⚠️ Runtime Error"}.get(m, "⚠️ Error") + return f"{int(round(pp['mean']))} pp / {tg['mean']:.1f} tg" + +def find_key(keys, prefix): + for k in keys: + if k.startswith(prefix): + return k + return None + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("models", nargs="*", help="Optional model prefixes to include") + args = ap.parse_args() + data = parse_logs() + want = [(m,m) for m in args.models] if args.models else DEFAULT_MODELS + + header = ["Model"] + [COL_NAMES[e] for e in ENV_ORDER] + ["🏆 Best PP","🏆 Best TG"] + print("| " + " | ".join(header) + " |") + print("|" + "|".join(["---"]*len(header)) + "|") + + for disp, patt in want: + key = find_key(data.keys(), patt) + row = [f"**{disp}**"] + if not key: + row += ["—"]*len(ENV_ORDER) + ["—","—"] + print("| " + " | ".join(row) + " |") + continue + ppd, tgd = data[key]["pp512"], data[key]["tg128"] + for env in ENV_ORDER: + row.append(cell(ppd.get(env), tgd.get(env))) + bpp, btg = best(ppd), best(tgd) + row.append(f"🏆 **{WINNER[bpp]}**" if bpp else "—") + row.append(f"🏆 **{WINNER[btg]}**" if btg else "—") + print("| " + " | ".join(row) + " |") + +if __name__ == "__main__": + main() diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..5e3d8f8 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x7f5e570) on address 0x7f3192c0f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..c3f4dab --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x16bd82e0) reason :GPU Hang +✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..4a8358a --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.20 ± 0.38 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.61 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..a5b862a --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 199.54 ± 0.38 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.75 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..b242732 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 128.00 ± 0.23 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.88 ± 0.02 | + +build: 0d883154 (6101) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log new file mode 100644 index 0000000..8519a29 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 124.86 ± 0.54 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.27 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log new file mode 100644 index 0000000..4391361 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2a5da2e0) reason :GPU Hang +✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log new file mode 100644 index 0000000..d44f4f5 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log new file mode 100644 index 0000000..0fb7ad2 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 221.02 ± 0.58 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.47 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log new file mode 100644 index 0000000..9f7f467 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.86 ± 0.40 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.76 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log new file mode 100644 index 0000000..059fbe3 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.02 ± 0.82 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.04 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log new file mode 100644 index 0000000..67c76bf --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.93 ± 1.29 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.52 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log new file mode 100644 index 0000000..7fb6f1c --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 728.63 ± 1.23 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.59 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log new file mode 100644 index 0000000..2cfca97 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1616.55 ± 4.61 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 83.89 ± 0.22 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log new file mode 100644 index 0000000..1e319e2 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1520.07 ± 5.39 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.93 ± 0.09 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log new file mode 100644 index 0000000..8f074d4 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x394d3570) reason :GPU Hang +✖ ! [rocm6_4_2] gpt-oss-120b-F16 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log new file mode 100644 index 0000000..035fbc5 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 357.68 ± 1.49 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.70 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log new file mode 100644 index 0000000..6e2747e --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.47 ± 0.55 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.65 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log new file mode 100644 index 0000000..10f4c58 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 449.22 ± 1.12 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.49 ± 0.05 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log new file mode 100644 index 0000000..9d49924 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 230.32 ± 0.72 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.06 ± 0.02 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log new file mode 100644 index 0000000..378cd44 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.53 ± 1.06 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 43.56 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log new file mode 100644 index 0000000..784987e --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x299852d0) reason :GPU Hang +✖ ! [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log new file mode 100644 index 0000000..f5968ae --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 351.08 ± 0.86 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 44.63 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log new file mode 100644 index 0000000..2dc8a85 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 485.98 ± 2.23 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 48.09 ± 0.04 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log new file mode 100644 index 0000000..19e9a00 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 239.16 ± 1.26 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 48.93 ± 0.06 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log new file mode 100644 index 0000000..ffdd70e --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 323.64 ± 4.29 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.64 ± 0.06 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log b/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log new file mode 100644 index 0000000..40fd017 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.15 ± 3.76 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.90 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log new file mode 100644 index 0000000..272ca13 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.27 ± 5.39 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.86 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log new file mode 100644 index 0000000..e8395dc --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 369.86 ± 1.57 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 8.59 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log new file mode 100644 index 0000000..41c0d3f --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 318.82 ± 1.63 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 7.77 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log new file mode 100644 index 0000000..03d8cf8 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 580.67 ± 2.03 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.26 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log new file mode 100644 index 0000000..a4f7d4c --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.04 ± 2.48 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.37 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log new file mode 100644 index 0000000..7584083 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.15 ± 2.11 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.38 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log new file mode 100644 index 0000000..60e3b9f --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1206.08 ± 8.80 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 68.90 ± 0.18 | + +build: 0d883154 (6101) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log new file mode 100644 index 0000000..d9302e5 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 646.77 ± 4.63 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 69.82 ± 0.03 | + +build: 0d883154 (6101)