diff --git a/benchmark/generate_results.json.py b/benchmark/generate_results.json.py new file mode 100644 index 0000000..17bdd1c --- /dev/null +++ b/benchmark/generate_results.json.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +import re, glob, os, json, time +from pathlib import Path + +RESULTS_DIR = "results" +OUT_JSON = "results.json" + +# --- Regexes --------------------------------------------------------------- + +# Table headers come in two shapes (with or without "fa" column) +HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE) +SEP_RE = re.compile(r"^\|\s*-+") + +# Build line, e.g. "build: cd6983d5 (6119)" +BUILD_RE = re.compile(r"build:\s*([0-9a-f]{7,})\s*\((\d+)\)", re.IGNORECASE) + +# Error classifiers (same spirit as your table script) +LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE) +HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE) +GENERIC_ERR= re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE) + +# Extract numeric ± numeric from the last column +TS_RE = re.compile(r"([\d.]+)\s*±\s*([\d.]+)") + +# Quantization from model name +QUANT_RE = re.compile(r"(Q\d+_[A-Z_]+|BF16|F16|F32|mxfp\d+)", re.IGNORECASE) + +# Params like "235.09 B" from the table +PARAMS_RE = re.compile(r"([\d.]+)\s*B", re.IGNORECASE) +# File size like "96.99 GiB" from the table +GIB_RE = re.compile(r"([\d.]+)\s*GiB", re.IGNORECASE) + +# "30B", "235B" from model name +NAME_B_RE = re.compile(r"(\d+(?:\.\d+)?)B") + +# Shard suffix in filenames +SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE) + +# --- Helpers --------------------------------------------------------------- + +def clean_model_name(raw): + base = SHARD_RE.sub("", raw) + return base + +def parse_env_and_fa(basename): + # pattern: __[__fa1] + parts = basename.split("__") + if len(parts) < 2: + return None, False + env = parts[1] + fa = (len(parts) > 2 and parts[2].lower() == "fa1") + return env, fa + +def env_base_and_variant(env): + # e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma") + if "-" in env: + base, variant = env.split("-", 1) + return base, variant + return env, None + +def detect_error(text): + if LOAD_ERR.search(text): + return True, "load" + if HANG_ERR.search(text): + return True, "hang" + if GENERIC_ERR.search(text): + return True, "runtime" + return False, None + +def parse_table(text): + """ + Returns list of rows parsed from the markdown-like table. + Each row is a dict of the parsed columns, normalized by header names. + Handles presence/absence of the 'fa' column. + """ + lines = text.splitlines() + rows = [] + header = None + col_idx = {} + + for i, line in enumerate(lines): + if HEADER_RE.search(line): + # header line + header = [c.strip().lower() for c in line.strip().strip("|").split("|")] + # next line should be the separator; skip it + # build index map + for idx, name in enumerate(header): + col_idx[name] = idx + continue + if header and (SEP_RE.search(line) or not line.strip()): + # skip separators / blanks after header + continue + if header and line.startswith("|"): + parts = [c.strip() for c in line.strip().strip("|").split("|")] + # guard for short lines + if len(parts) < len(header): + continue + row = {} + for name, idx in col_idx.items(): + row[name] = parts[idx] + rows.append(row) + # stop parsing block when a blank line after some rows appears + if header and line.strip() == "" and rows: + break + + return rows + +def coerce_float(m, default=None): + try: + return float(m) + except: + return default + +def extract_quant(model_name): + m = QUANT_RE.search(model_name) + return (m.group(1).upper() if m else None) + +def b_from_name(model_name): + m = NAME_B_RE.search(model_name) + return coerce_float(m.group(1)) if m else None + +# --- Main scan ------------------------------------------------------------- + +runs = [] +builds = set() +envs = set() + +for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))): + base = os.path.basename(path).rsplit(".log", 1)[0] + if "__" not in base: + continue + + model_raw, _rest = base.split("__", 1) + env, fa_from_name = parse_env_and_fa(base) + envs.add(env) + + model_clean = clean_model_name(model_raw) + + with open(path, errors="ignore") as f: + text = f.read() + + # build info (take the last match in file if many) + build_hash, build_num = None, None + for m in BUILD_RE.finditer(text): + build_hash, build_num = m.group(1), m.group(2) + if build_hash: + builds.add((build_hash, build_num)) + + # detect error (if there is no valid table rows) + table_rows = parse_table(text) + + # If table rows exist, we’ll still mark errors only if no perf found + has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows) + has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows) + error, etype = (False, None) + if not (has_pp or has_tg): + error, etype = detect_error(text) + + # Determine FA flag: + # prefer explicit column "fa" if present, else fallback to filename "__fa1" + fa_in_table = None + for r in table_rows: + if "fa" in r: + try: + fa_in_table = int(r["fa"]) == 1 + except: + fa_in_table = None + break + fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name + + # Normalize env base / variant (e.g., rocwmma) + env_base, env_variant = env_base_and_variant(env) + + # Emit one run per row (pp512 / tg128) + for r in table_rows or [{}]: + test = r.get("test", "").lower() if table_rows else None + tps_mean, tps_std = None, None + if table_rows: + ts_field = r.get("t/s", "") + m = TS_RE.search(ts_field) + if m: + tps_mean = coerce_float(m.group(1)) + tps_std = coerce_float(m.group(2)) + + # parse numeric helpers from row (if present) + params_b = None + file_size_gib = None + if "params" in r: + pm = PARAMS_RE.search(r["params"]) + if pm: params_b = coerce_float(pm.group(1)) + if "size" in r: + sm = GIB_RE.search(r["size"]) + if sm: file_size_gib = coerce_float(sm.group(1)) + + backend = r.get("backend") + ngl = r.get("ngl") + mmap = r.get("mmap") + + # quant from model name + quant = extract_quant(model_clean) + name_params_b = b_from_name(model_clean) + + run = { + "model": model_raw, + "model_clean": model_clean, + "env": env, + "env_base": env_base, + "env_variant": env_variant, # e.g. "rocwmma" + "fa": bool(fa_enabled), + "test": test, # "pp512" | "tg128" | None (if error) + "tps_mean": tps_mean, + "tps_std": tps_std, + "error": bool(error), + "error_type": etype, # "load" | "hang" | "runtime" | None + "backend": backend, + "ngl": (int(ngl) if (ngl and ngl.isdigit()) else None), + "mmap": (int(mmap) if (mmap and mmap.isdigit()) else None), + "params_b": params_b, # from table, if available + "file_size_gib": file_size_gib, # from table, if available + "name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0) + "quant": quant, + "log": path, + "build": {"hash": build_hash, "number": build_num} if build_hash else None, + } + runs.append(run) + +# Meta +meta = { + "generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "os_kernel": "Fedora 42 — Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)", + "llamacpp_builds": [{"hash": h, "number": n} for (h, n) in sorted(builds)], + "environments": sorted(envs), + "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second", +} + +out = {"meta": meta, "runs": runs} + +Path(OUT_JSON).write_text(json.dumps(out, indent=2)) +print(f"Wrote {OUT_JSON} with {len(runs)} rows.") diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..d97d416 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2edd2a90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..d044208 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x432ea90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log index 5e3d8f8..e1a550e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x7f5e570) on address 0x7f3192c0f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 129.88 ± 0.57 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.43 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..268535b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x834aa90) on address 0x7f10fb96f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log index c3f4dab..52deb8e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x16bd82e0) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x100d3790) reason :GPU Hang ✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..8039123 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x13829790) on address 0x7fa8ef9a9000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log index 4a8358a..fcf0f01 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.20 ± 0.38 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.61 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 130.17 ± 0.38 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.83 ± 0.00 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..94079a7 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 103.63 ± 0.10 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.09 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index a5b862a..4ef718e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 199.54 ± 0.38 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.75 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 200.76 ± 0.32 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.78 ± 0.00 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..4bbf6de --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 201.86 ± 0.27 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 22.83 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index b242732..90347e7 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 128.00 ± 0.23 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.88 ± 0.02 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.73 ± 0.23 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.88 ± 0.02 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..cf98168 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 132.54 ± 0.34 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 23.31 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..5dc10c6 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 113.62 ± 0.21 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.47 ± 0.04 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..a0c808c --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2f508a90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log index 8519a29..d1de7a1 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 124.86 ± 0.54 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.27 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.82 ± 0.18 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.35 ± 0.00 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log new file mode 100644 index 0000000..5ed10e0 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x1527fa90) on address 0x7f55d5f6f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log index 4391361..273166e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2a5da2e0) reason :GPU Hang -✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 120.54 ± 0.30 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log new file mode 100644 index 0000000..c23fe13 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2a849790) reason :GPU Hang +✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log index d44f4f5..5fbf5b3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.18 ± 0.48 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log new file mode 100644 index 0000000..28ae734 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log index 0fb7ad2..4247170 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 221.02 ± 0.58 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.47 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 223.02 ± 0.69 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.47 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..e3bc753 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 224.54 ± 0.65 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.49 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log index 9f7f467..5f0ace5 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.86 ± 0.40 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.76 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.36 ± 0.46 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.78 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..1973a52 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 131.78 ± 0.46 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.99 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..135d108 --- /dev/null +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x121f0a90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..29b2095 --- /dev/null +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x17018a90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log index 3cb770a..08dae7b 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x68b7b10) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x11442a90) reason :GPU Hang ✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..1849a77 --- /dev/null +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x64dea90) reason :GPU Hang +✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log index d0101aa..e01b520 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1587b430) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0xa636790) reason :GPU Hang ✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..2f2342b --- /dev/null +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1417b7b0) reason :GPU Hang +✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log index d77c334..c479337 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log @@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 33.30 ± 0.04 | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.64 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..7b0ea20 --- /dev/null +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | pp512 | 31.09 ± 0.02 | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | tg128 | 2.65 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log index 7d3b718..4581b23 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -4,5 +4,5 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | ggml_vulkan: Device memory allocation of size 2491416576 failed. ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' +main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' ✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..8835330 --- /dev/null +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +ggml_vulkan: Device memory allocation of size 2491416576 failed. +ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory +main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' +✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 1) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log index f9cf4af..c6c72c5 100644 --- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 99 | 0 | pp512 | 76.48 ± 0.23 | -| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 99 | 0 | tg128 | 2.65 ± 0.00 | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | pp512 | 78.70 ± 0.20 | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | tg128 | 2.66 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..ea12120 --- /dev/null +++ b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | pp512 | 81.29 ± 0.14 | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | tg128 | 2.66 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..a418f5b --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0xcd80a90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..3de552f --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1496da90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log index 1d1603d..409a36b 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.17 ± 0.07 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.72 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.32 ± 0.04 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..952dddb --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.28 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.74 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log index 0d64f23..2acc073 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xa5e9440) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0xfeef7b0) reason :GPU Hang ✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..7a57ad3 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x6d017c0) on address 0x7f967f1a9000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..c55bab8 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log index 747dc38..eb3efec 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 96.23 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.72 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 98.14 ± 0.14 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..966e109 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 99.24 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.72 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log index 22a6a30..80c3a0e 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 79.71 ± 0.13 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.72 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 80.11 ± 0.09 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..5826f3e --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 82.90 ± 0.14 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..40f418b --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x28bb9a90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..a94cdd6 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x194fea90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log index 33fcb65..f7132fb 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 121.52 ± 0.98 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.28 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 134.39 ± 0.32 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.33 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..53feea1 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x3b11ea90) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log index 535626f..6d3b4ea 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x27159430) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x17ad57b0) reason :GPU Hang ✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..107b01e --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x2314b7b0) on address 0x7f38249a9000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log index 995bcbd..ccf7ac1 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 135.36 ± 0.39 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.29 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.25 ± 0.50 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 | -build: 4cb208c9 (6066) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..8df0b3e --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log index f1d30fc..dc80b9d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 243.19 ± 1.20 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.28 ± 0.03 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 243.45 ± 1.29 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.29 ± 0.01 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..08242f2 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 247.48 ± 1.28 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.03 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log index 89e18de..ba7a655 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 137.97 ± 0.99 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.07 ± 0.05 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 148.25 ± 0.91 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.21 ± 0.06 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..14f12dd --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 149.82 ± 0.83 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.21 ± 0.04 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..2faeaa3 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x9ae6a90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..6ff4745 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x6e9ba90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log index b7b6ab3..8678b7b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2b17db10) reason :GPU Hang -✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.44 ± 0.76 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.61 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log new file mode 100644 index 0000000..099d9b2 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2fba3a90) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log index 4981e9d..c768b8e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1a77430) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x4081f7b0) reason :GPU Hang ✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log new file mode 100644 index 0000000..98c472e --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x3c0f27b0) reason :GPU Hang +✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log new file mode 100644 index 0000000..3ccfa82 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log index cda78f6..3bdeae7 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 238.93 ± 2.89 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.25 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 258.18 ± 1.38 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.23 ± 0.01 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..2060565 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 260.16 ± 1.44 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.09 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log index 6a5f1fb..d9b6ebc 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 145.86 ± 2.44 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.27 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 168.63 ± 0.81 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.26 ± 0.01 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..579e532 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 172.37 ± 0.92 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.25 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..070646e --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1a40fa90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..3fa46c3 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2e0ffa90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log index 5242c84..3ec496d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 132.66 ± 0.56 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.29 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.27 ± 0.66 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.40 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..9d0c061 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x3a741a90) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log index c8275dd..fb93137 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 133.71 ± 0.64 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.35 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.90 ± 0.66 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.62 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..ee0d484 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.61 ± 0.50 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.60 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..bde171a --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.58 ± 0.18 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.55 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index 72a362e..75ac351 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 208.84 ± 1.35 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.06 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 218.18 ± 0.83 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.04 ± 0.02 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a745a31 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 221.15 ± 0.74 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.58 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index 71adfea..4b78701 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 133.49 ± 1.83 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 19.99 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 152.21 ± 0.66 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 19.98 ± 0.01 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..ee535dc --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 155.22 ± 1.09 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.93 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..aa6dfe3 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x153dfa90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..e2df164 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2bd2ba90) reason :GPU Hang +✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log index c21206d..1bc098e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 69.48 ± 0.09 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.54 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 74.15 ± 0.18 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.73 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log new file mode 100644 index 0000000..40b3223 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x25011a90) on address 0x7fdcc1b6f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log index 6cb77a4..b5a6749 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1a8d440) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x513c7b0) reason :GPU Hang ✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log new file mode 100644 index 0000000..7826050 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x2567c7c0) on address 0x7ee66236f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log index 2421a1b..dbd9c47 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log @@ -2,9 +2,4 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 74.69 ± 0.17 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.56 ± 0.00 | - -build: 4cb208c9 (6066) +✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log new file mode 100644 index 0000000..57b950a --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log index dba1565..af5c138 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 99.94 ± 0.91 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 15.72 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 114.49 ± 0.60 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 15.98 ± 0.01 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..19e5e37 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 116.07 ± 0.64 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 15.84 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log index 11f7672..2aefda4 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 58.40 ± 0.21 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 16.29 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 64.85 ± 0.38 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.58 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..c0359f0 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 66.76 ± 0.43 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 16.83 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..3c0cef6 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.95 ± 2.63 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.53 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..af7dc3f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 162.19 ± 3.06 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.03 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log index e10adbb..03365ca 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.74 ± 2.65 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 22.88 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.69 ± 2.52 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.89 ± 0.01 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..86ac559 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 140.32 ± 2.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.33 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log index bb3fa29..ea26bd0 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 151.25 ± 3.33 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 23.80 ± 0.09 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 153.49 ± 1.19 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.52 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..bb2103f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 138.49 ± 2.52 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.35 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log index 47cba1d..e446a9b 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 154.95 ± 1.58 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 23.08 ± 0.08 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 152.26 ± 2.41 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.55 ± 0.00 | -build: 4cb208c9 (6066) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..d73c640 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 137.52 ± 1.75 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.33 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log index c75a868..1687c7e 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 90.91 ± 0.35 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.96 ± 0.03 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 107.48 ± 0.16 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.04 ± 0.01 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a9a752b --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.64 ± 0.13 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.96 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log index ef72d92..ccca043 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 71.16 ± 0.92 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.33 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.97 ± 0.12 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.38 ± 0.01 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..48148ef --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 87.05 ± 0.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.40 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..dc6f1a9 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.77 ± 0.97 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.31 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..60992bd --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 412.35 ± 1.06 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 48.26 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log new file mode 100644 index 0000000..bd9bc1c --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.72 ± 2.63 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.19 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log new file mode 100644 index 0000000..2a04531 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 301.29 ± 0.54 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.58 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log new file mode 100644 index 0000000..a3987ef --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 390.07 ± 0.40 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.19 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log new file mode 100644 index 0000000..a9ca9ef --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 300.60 ± 2.31 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.78 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log new file mode 100644 index 0000000..8ff09a7 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.99 ± 1.86 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.31 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log new file mode 100644 index 0000000..db6f9b0 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 302.87 ± 0.88 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.90 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log new file mode 100644 index 0000000..51b45f0 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 736.95 ± 3.72 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 56.89 ± 0.26 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..3f2a08e --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 727.71 ± 2.81 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 53.34 ± 0.31 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log new file mode 100644 index 0000000..5140ff3 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 395.16 ± 1.55 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 58.95 ± 0.45 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..6bbc4f2 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 405.61 ± 1.85 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 58.06 ± 0.28 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..6625574 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.50 ± 1.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.55 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..ded0220 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 154.09 ± 1.98 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.02 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log index 445b37e..222959d 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 150.53 ± 1.83 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 22.13 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.34 ± 1.74 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.14 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..207a2a1 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 134.40 ± 1.47 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.32 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log index 1204c49..cc48f94 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 147.31 ± 2.22 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.12 ± 0.06 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 146.55 ± 1.77 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.54 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..285bed2 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x2bd8a7b0) on address 0x7fe0b0d6f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log index b366cae..29fa537 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 144.59 ± 3.08 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 23.48 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 145.91 ± 1.76 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.57 ± 0.01 | -build: 4cb208c9 (6066) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..1416318 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log index 33fe404..65ecb3e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 90.38 ± 0.57 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.00 ± 0.03 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 106.99 ± 0.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.03 ± 0.01 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..2b69233 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.10 ± 0.08 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.98 ± 0.02 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log index cd83c90..3a2d167 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 71.53 ± 1.06 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.34 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.50 ± 0.06 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.42 ± 0.01 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..9132fa2 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 86.52 ± 0.06 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.40 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..9f1e992 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 223.38 ± 0.29 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..348f5ed --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 229.77 ± 0.32 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.59 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log index 21a2b99..5872035 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 223.36 ± 0.23 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.81 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.86 ± 0.11 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.85 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log new file mode 100644 index 0000000..de6f8de --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 202.13 ± 0.24 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.58 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log index fc2cc5b..6493650 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.95 ± 0.15 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.80 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.67 ± 0.37 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.88 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log new file mode 100644 index 0000000..a535d64 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 203.12 ± 0.35 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.60 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log index acf4970..f1ec100 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.99 ± 0.24 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.81 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.49 ± 0.29 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 | -build: 4cb208c9 (6066) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log new file mode 100644 index 0000000..f4493e0 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 201.47 ± 0.21 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.61 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log index 2ba5269..5ac352f 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 683.07 ± 1.03 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 13.84 ± 0.02 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 676.94 ± 0.85 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 13.99 ± 0.01 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..b3193bd --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 371.17 ± 0.24 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.30 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log index 5d31829..b620676 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 508.55 ± 0.90 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 13.65 ± 0.02 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 503.27 ± 1.09 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 13.76 ± 0.02 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..5e9431a --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 495.99 ± 2.36 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 13.61 ± 0.03 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..96d541d --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 92.52 ± 0.44 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.05 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..a5e826d --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 94.54 ± 0.52 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log index bbf9e04..c646996 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 88.73 ± 0.50 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.02 ± 0.00 | - -build: 66625a59 (6040) +HW Exception by GPU node-1 (Agent handle: 0x10c4a90) reason :GPU Hang +✖ ! [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log new file mode 100644 index 0000000..d3b262b --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.75 ± 0.35 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log index a664b0b..aaaffba 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 82.31 ± 0.29 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.99 ± 0.01 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 91.54 ± 0.50 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log new file mode 100644 index 0000000..18449f7 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.61 ± 0.31 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log index 8ab75d9..9e9f25c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 83.18 ± 0.41 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.99 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 55.68 ± 0.47 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 3.11 ± 0.98 | -build: 4cb208c9 (6066) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..f7ce012 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.08 ± 0.42 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log index 45f0b37..d74242d 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log @@ -4,5 +4,5 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | ggml_vulkan: Device memory allocation of size 2819260416 failed. ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' +main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' ✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a667917 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +ggml_vulkan: Device memory allocation of size 2819260416 failed. +ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory +main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 failed (exit 1) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log index 0dccabf..0c3a407 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 135.40 ± 0.29 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 3.98 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | pp512 | 135.58 ± 0.45 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | tg128 | 4.00 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..f2077af --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | pp512 | 138.61 ± 0.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | tg128 | 4.00 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..34e7e86 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 729.91 ± 1.22 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.14 ± 0.03 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..16dd036 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 752.25 ± 0.73 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.93 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log index 059fbe3..f07fba3 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.02 ± 0.82 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.04 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 730.51 ± 1.49 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.35 ± 0.02 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log new file mode 100644 index 0000000..08d39fe --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 645.88 ± 0.61 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.63 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log index 67c76bf..ea76e52 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.93 ± 1.29 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.52 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 732.13 ± 1.42 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.23 ± 0.03 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log new file mode 100644 index 0000000..76e9619 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 652.29 ± 0.45 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.62 ± 0.02 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log index 7fb6f1c..ce94640 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 728.63 ± 1.23 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.59 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 730.59 ± 1.69 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.01 ± 0.03 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log new file mode 100644 index 0000000..4c71363 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 646.16 ± 0.39 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.53 ± 0.02 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log index 2cfca97..b707702 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1616.55 ± 4.61 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 83.89 ± 0.22 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1614.72 ± 4.91 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 84.00 ± 0.23 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..6055d96 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 942.34 ± 1.76 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 57.70 ± 0.22 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log index 1e319e2..5a56858 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1520.07 ± 5.39 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.93 ± 0.09 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1527.75 ± 3.86 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 85.54 ± 0.99 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log new file mode 100644 index 0000000..ab5608b --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 1489.57 ± 4.71 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 80.63 ± 0.22 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..2301d16 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 355.01 ± 0.57 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.66 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..dbb739d --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 411.33 ± 1.01 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.50 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log index 8f074d4..fc1ded3 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x394d3570) reason :GPU Hang -✖ ! [rocm6_4_2] gpt-oss-120b-F16 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.36 ± 0.53 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 31.90 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log new file mode 100644 index 0000000..a62923c --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 247.95 ± 0.40 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log index 035fbc5..4e2c281 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 357.68 ± 1.49 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.70 ± 0.01 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 357.38 ± 0.76 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.62 ± 0.00 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log new file mode 100644 index 0000000..707b558 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 249.65 ± 0.33 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log index 6e2747e..63dd9d9 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.47 ± 0.55 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.65 ± 0.00 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 356.67 ± 0.74 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.68 ± 0.02 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log new file mode 100644 index 0000000..8096c36 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 247.49 ± 0.65 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.07 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log index 10f4c58..755a9cf 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 449.22 ± 1.12 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.49 ± 0.05 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 448.17 ± 1.37 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.39 ± 0.03 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..152170f --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 498.69 ± 2.19 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.06 ± 0.03 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log index 9d49924..5ab95e4 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 230.32 ± 0.72 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.06 ± 0.02 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 229.59 ± 0.74 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.08 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log new file mode 100644 index 0000000..9d830ae --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 243.40 ± 0.99 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.07 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..3f432b9 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.53 ± 0.62 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.05 ± 0.08 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..fa4767b --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 408.50 ± 1.91 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.69 ± 0.18 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log index 378cd44..c1f2f78 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.53 ± 1.06 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 43.56 ± 0.00 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.45 ± 1.22 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 44.12 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log new file mode 100644 index 0000000..769bedc --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 246.76 ± 0.35 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 43.67 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log index 784987e..3892e39 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x299852d0) reason :GPU Hang -✖ ! [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 354.82 ± 1.02 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.00 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log new file mode 100644 index 0000000..69476e2 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 248.22 ± 0.50 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.05 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log index f5968ae..3a57ced 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 351.08 ± 0.86 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 44.63 ± 0.03 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.20 ± 0.59 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.15 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log new file mode 100644 index 0000000..93e7fca --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log index 2dc8a85..d229658 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 485.98 ± 2.23 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 48.09 ± 0.04 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 486.90 ± 2.23 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 48.08 ± 0.03 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..b556c96 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 546.41 ± 2.88 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 47.25 ± 0.02 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log index 19e9a00..802c652 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 239.16 ± 1.26 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 48.93 ± 0.06 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 239.72 ± 1.23 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 49.01 ± 0.06 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..6b8a8c4 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 255.17 ± 1.65 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 48.93 ± 0.02 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..e1b0205 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 324.54 ± 4.39 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.87 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..8e851e8 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 380.87 ± 8.21 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.79 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log index ffdd70e..d9bd7eb 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 323.64 ± 4.29 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.64 ± 0.06 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 323.86 ± 4.33 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.27 ± 0.00 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log new file mode 100644 index 0000000..266806c --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 257.11 ± 2.63 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.47 ± 0.08 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log b/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log index 40fd017..d76138e 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.15 ± 3.76 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.90 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 322.43 ± 2.59 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.89 ± 0.00 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log new file mode 100644 index 0000000..6dd4954 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 254.08 ± 3.99 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.62 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log index 272ca13..67b820b 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.27 ± 5.39 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.86 ± 0.00 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 319.36 ± 3.07 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.88 ± 0.00 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log new file mode 100644 index 0000000..e07a069 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 254.87 ± 2.27 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.62 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log index e8395dc..52536d1 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 369.86 ± 1.57 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 8.59 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 369.69 ± 1.79 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 8.59 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..974e845 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 389.86 ± 2.13 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 8.58 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log index 41c0d3f..7decf08 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 318.82 ± 1.63 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 7.77 ± 0.01 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 319.09 ± 1.46 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 7.79 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log new file mode 100644 index 0000000..a9ce691 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 335.15 ± 1.80 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 7.79 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..c377132 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 580.83 ± 2.46 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.47 ± 0.02 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..cb2c45b --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 649.48 ± 3.21 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 64.18 ± 0.02 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log index 03d8cf8..343b2b0 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 580.67 ± 2.03 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.26 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 582.89 ± 2.32 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.45 ± 0.02 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log new file mode 100644 index 0000000..34d817d --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 394.67 ± 1.08 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.97 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log index a4f7d4c..441cec1 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.04 ± 2.48 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.37 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 583.52 ± 2.76 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.39 ± 0.01 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log new file mode 100644 index 0000000..e5f1e99 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 396.75 ± 0.60 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.98 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log index 7584083..97fab79 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.15 ± 2.11 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.38 ± 0.01 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 581.83 ± 1.10 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.50 ± 0.02 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log new file mode 100644 index 0000000..3e34f41 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 394.87 ± 0.73 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.06 ± 0.01 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log index 60e3b9f..2d4b788 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1206.08 ± 8.80 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 68.90 ± 0.18 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 1205.02 ± 7.18 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 68.84 ± 0.04 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..9a5c4c5 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 1472.56 ± 14.39 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 67.78 ± 0.18 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log index d9302e5..f400d0f 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 646.77 ± 4.63 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 69.82 ± 0.03 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 648.85 ± 6.28 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 69.88 ± 0.04 | -build: 0d883154 (6101) +build: cd6983d5 (6119) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log new file mode 100644 index 0000000..1959c7e --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 728.38 ± 8.17 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 69.80 ± 0.05 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log new file mode 100644 index 0000000..e9da9da --- /dev/null +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.47 ± 0.04 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.62 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log new file mode 100644 index 0000000..0388774 --- /dev/null +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 34.51 ± 0.02 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.61 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log index cd91f9d..01f32df 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.89 ± 0.03 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.59 ± 0.00 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.79 ± 0.03 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.52 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log new file mode 100644 index 0000000..f9ae86b --- /dev/null +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.67 ± 0.04 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log index cdd01d1..f6959d1 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.91 ± 0.04 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.60 ± 0.00 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.88 ± 0.02 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log new file mode 100644 index 0000000..2869c45 --- /dev/null +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.67 ± 0.02 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log index 782d37e..6bd1b01 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.82 ± 0.05 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.52 ± 0.00 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.91 ± 0.03 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 | -build: 4cb208c9 (6066) +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log new file mode 100644 index 0000000..77dd920 --- /dev/null +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.66 ± 0.04 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log index 2755187..bc604f8 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 72.75 ± 0.03 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 5.01 ± 0.00 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 72.75 ± 0.02 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.03 ± 0.00 | -build: 9c35706b (6060) +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..7ac44cb --- /dev/null +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 73.57 ± 0.02 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 5.00 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log index b827d6f..4cc5212 100644 --- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 79.12 ± 0.14 | -| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 4.97 ± 0.00 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 78.99 ± 0.18 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.00 ± 0.00 | -build: 66625a59 (6040) +build: cd6983d5 (6119) diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log new file mode 100644 index 0000000..869327e --- /dev/null +++ b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 80.92 ± 0.05 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 4.99 ± 0.00 | + +build: cd6983d5 (6119) diff --git a/benchmark/results/run_benchmarks.log b/benchmark/results/run_benchmarks.log new file mode 100644 index 0000000..073dde1 --- /dev/null +++ b/benchmark/results/run_benchmarks.log @@ -0,0 +1,1153 @@ +Found 18 model(s) to bench: + • /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf + • /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf + • /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf + • /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf + • /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf + • /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf + • /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf + • /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf + • /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf + • /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf + • /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf + • /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf + • /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf + • /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf + • /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf + • /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf + • /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf + • /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf + + +▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf + + +▶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 __fa1 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf + + +▶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 __fa1 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf + + +▶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 __fa1 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf + + +▶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 __fa1 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf + + * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 : FAILED + +▶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 + + * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 : FAILED + +▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf + + * [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 : FAILED + +▶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 __fa1 + → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf + + +▶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL __fa1 + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 + + +▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf + + +▶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL __fa1 + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf + + +▶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL __fa1 + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 + + +▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL + → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf + + +▶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL __fa1 + → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 + + +▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL + → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf + + +▶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL __fa1 + → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 + + +▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf + + +▶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL __fa1 + → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1 + + +▶ [rocm7_rc] gemma-3-4b-it-Q3_K_S + → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf + + +▶ [rocm7_rc] gemma-3-4b-it-Q3_K_S __fa1 + → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 + + +▶ [rocm7_beta] gemma-3-4b-it-Q3_K_S + → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf + + +▶ [rocm7_beta] gemma-3-4b-it-Q3_K_S __fa1 + → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S + → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf + + +▶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S __fa1 + → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 + + +▶ [vulkan_radv] gemma-3-4b-it-Q3_K_S + → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf + + +▶ [vulkan_radv] gemma-3-4b-it-Q3_K_S __fa1 + → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 + + +▶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S + → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf + + +▶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S __fa1 + → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 + + +▶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S + → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf + + +▶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S __fa1 + → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1 + + +▶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf + + * [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED + +▶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf + + * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED + +▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf + + +▶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf + + +▶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf + + +▶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf + + +▶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 + + +▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf + + +▶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 + + +▶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf + + +▶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 + → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [rocm7_rc] gpt-oss-120b-F16 + → log: results/gpt-oss-120b-F16__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf + + +▶ [rocm7_rc] gpt-oss-120b-F16 __fa1 + → log: results/gpt-oss-120b-F16__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 + + +▶ [rocm7_beta] gpt-oss-120b-F16 + → log: results/gpt-oss-120b-F16__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf + + +▶ [rocm7_beta] gpt-oss-120b-F16 __fa1 + → log: results/gpt-oss-120b-F16__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 + → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf + + +▶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 __fa1 + → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 + + +▶ [vulkan_radv] gpt-oss-120b-F16 + → log: results/gpt-oss-120b-F16__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf + + +▶ [vulkan_radv] gpt-oss-120b-F16 __fa1 + → log: results/gpt-oss-120b-F16__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 + + +▶ [vulkan_amdvlk] gpt-oss-120b-F16 + → log: results/gpt-oss-120b-F16__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf + + +▶ [vulkan_amdvlk] gpt-oss-120b-F16 __fa1 + → log: results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 + + +▶ [rocm6_4_2] gpt-oss-120b-F16 + → log: results/gpt-oss-120b-F16__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf + + +▶ [rocm6_4_2] gpt-oss-120b-F16 __fa1 + → log: results/gpt-oss-120b-F16__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1 + + +▶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf + + +▶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 + + * [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 : FAILED + +▶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf + + +▶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf + + +▶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 + + +▶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf + + +▶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 + + +▶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf + + +▶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 + + +▶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf + + +▶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 + → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1 + + +▶ [rocm7_rc] gpt-oss-20b-F32 + → log: results/gpt-oss-20b-F32__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf + + +▶ [rocm7_rc] gpt-oss-20b-F32 __fa1 + → log: results/gpt-oss-20b-F32__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 + + +▶ [rocm7_beta] gpt-oss-20b-F32 + → log: results/gpt-oss-20b-F32__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf + + +▶ [rocm7_beta] gpt-oss-20b-F32 __fa1 + → log: results/gpt-oss-20b-F32__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 + → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf + + +▶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 __fa1 + → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 + + +▶ [vulkan_radv] gpt-oss-20b-F32 + → log: results/gpt-oss-20b-F32__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf + + +▶ [vulkan_radv] gpt-oss-20b-F32 __fa1 + → log: results/gpt-oss-20b-F32__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 + + +▶ [vulkan_amdvlk] gpt-oss-20b-F32 + → log: results/gpt-oss-20b-F32__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf + + +▶ [vulkan_amdvlk] gpt-oss-20b-F32 __fa1 + → log: results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 + + +▶ [rocm6_4_2] gpt-oss-20b-F32 + → log: results/gpt-oss-20b-F32__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf + + +▶ [rocm6_4_2] gpt-oss-20b-F32 __fa1 + → log: results/gpt-oss-20b-F32__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1 + + +▶ [rocm7_rc] gpt-oss-20b-mxfp4 + → log: results/gpt-oss-20b-mxfp4__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf + + +▶ [rocm7_rc] gpt-oss-20b-mxfp4 __fa1 + → log: results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 + + +▶ [rocm7_beta] gpt-oss-20b-mxfp4 + → log: results/gpt-oss-20b-mxfp4__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf + + +▶ [rocm7_beta] gpt-oss-20b-mxfp4 __fa1 + → log: results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 + → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf + + +▶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 __fa1 + → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 + + +▶ [vulkan_radv] gpt-oss-20b-mxfp4 + → log: results/gpt-oss-20b-mxfp4__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf + + +▶ [vulkan_radv] gpt-oss-20b-mxfp4 __fa1 + → log: results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 + + +▶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 + → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf + + +▶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 __fa1 + → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 + + +▶ [rocm6_4_2] gpt-oss-20b-mxfp4 + → log: results/gpt-oss-20b-mxfp4__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf + + +▶ [rocm6_4_2] gpt-oss-20b-mxfp4 __fa1 + → log: results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1 + + +▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf + + +▶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf + + * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED + +▶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf + + * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED + +▶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf + + +▶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf + + * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED + +▶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf + + * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED + +▶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf + + * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED + +▶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf + + * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED + +▶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf + + * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED + +▶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf + + +▶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf + + +▶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf + + +▶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 + → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M + → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf + + +▶ [rocm7_rc] llama3.3-70.6B-Q4_K_M __fa1 + → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 + + +▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M + → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf + + +▶ [rocm7_beta] llama3.3-70.6B-Q4_K_M __fa1 + → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M + → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf + + +▶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M __fa1 + → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 + + +▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M + → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf + + +▶ [vulkan_radv] llama3.3-70.6B-Q4_K_M __fa1 + → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 + + +▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M + → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf + + +▶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M __fa1 + → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 + + +▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M + → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf + + +▶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M __fa1 + → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1 + + +▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf + + * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED + +▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf + + * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED + +▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf + + +▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED + +▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf + + +▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 + + * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED + +▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf + + * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED + +▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 + + * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf + + * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED + +▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED + +▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf + + +▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf + + +▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf + + +▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1 + + * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED + +▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf + + * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED + +▶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 + + * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED + +▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf + + * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED + +▶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 + + * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf + + * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED + +▶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 + + * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED + +▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf + + +▶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 + + +▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf + + +▶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 + + +▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf + + +▶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 + → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1 + + * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED + +▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf + + * [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED + +▶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf + + * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED + +▶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf + + * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED + +▶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf + + +▶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 + + +▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf + + +▶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 + + +▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf + + +▶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 + → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1 + + * [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED + +▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf + + +▶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf + + +▶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf + + +▶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf + + +▶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf + + +▶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf + + +▶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf + + +▶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log + → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 + + * [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED + +▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf + + +▶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log + → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 + + * [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED + +▶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf + + +▶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf + + +▶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log + → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 + + +▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf + + +▶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log + → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 + + +▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf + + +▶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 + → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log + → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1 + diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..5e3d8f8 --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x7f5e570) on address 0x7f3192c0f000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..c3f4dab --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x16bd82e0) reason :GPU Hang +✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..4a8358a --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.20 ± 0.38 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.61 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..a5b862a --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 199.54 ± 0.38 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.75 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..b242732 --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 128.00 ± 0.23 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 22.88 ± 0.02 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log new file mode 100644 index 0000000..8519a29 --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 124.86 ± 0.54 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.27 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log new file mode 100644 index 0000000..4391361 --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2a5da2e0) reason :GPU Hang +✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log new file mode 100644 index 0000000..d44f4f5 --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log new file mode 100644 index 0000000..0fb7ad2 --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 221.02 ± 0.58 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.47 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log new file mode 100644 index 0000000..9f7f467 --- /dev/null +++ b/benchmark/results_old/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.86 ± 0.40 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.76 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..3cb770a --- /dev/null +++ b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x68b7b10) reason :GPU Hang +✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..d0101aa --- /dev/null +++ b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1587b430) reason :GPU Hang +✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..d77c334 --- /dev/null +++ b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..7d3b718 --- /dev/null +++ b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +ggml_vulkan: Device memory allocation of size 2491416576 failed. +ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory +main: error: failed to load model '/home/kyuz0/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf' +✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1) diff --git a/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..f9cf4af --- /dev/null +++ b/benchmark/results_old/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 99 | 0 | pp512 | 76.48 ± 0.23 | +| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 99 | 0 | tg128 | 2.65 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..1d1603d --- /dev/null +++ b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.17 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.72 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..0d64f23 --- /dev/null +++ b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0xa5e9440) reason :GPU Hang +✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..b9ba150 --- /dev/null +++ b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..747dc38 --- /dev/null +++ b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 96.23 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.72 ± 0.00 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..22a6a30 --- /dev/null +++ b/benchmark/results_old/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 79.71 ± 0.13 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.72 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..33fcb65 --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 121.52 ± 0.98 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.28 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..535626f --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x27159430) reason :GPU Hang +✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..995bcbd --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 135.36 ± 0.39 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.29 ± 0.00 | + +build: 4cb208c9 (6066) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..f1d30fc --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 243.19 ± 1.20 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.28 ± 0.03 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..89e18de --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 137.97 ± 0.99 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.07 ± 0.05 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log new file mode 100644 index 0000000..b7b6ab3 --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2b17db10) reason :GPU Hang +✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log new file mode 100644 index 0000000..4981e9d --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1a77430) reason :GPU Hang +✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log new file mode 100644 index 0000000..9c06e2b --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log new file mode 100644 index 0000000..cda78f6 --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 238.93 ± 2.89 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.25 ± 0.01 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log new file mode 100644 index 0000000..6a5f1fb --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 145.86 ± 2.44 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.27 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..5242c84 --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 132.66 ± 0.56 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.29 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..c8275dd --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 133.71 ± 0.64 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.35 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..2e1a6fc --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -0,0 +1,5 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..72a362e --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 208.84 ± 1.35 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.06 ± 0.01 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..71adfea --- /dev/null +++ b/benchmark/results_old/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 133.49 ± 1.83 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 19.99 ± 0.01 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log new file mode 100644 index 0000000..c21206d --- /dev/null +++ b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 69.48 ± 0.09 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.54 ± 0.01 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log new file mode 100644 index 0000000..6cb77a4 --- /dev/null +++ b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x1a8d440) reason :GPU Hang +✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log new file mode 100644 index 0000000..2421a1b --- /dev/null +++ b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 74.69 ± 0.17 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.56 ± 0.00 | + +build: 4cb208c9 (6066) diff --git a/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log new file mode 100644 index 0000000..dba1565 --- /dev/null +++ b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 99.94 ± 0.91 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 15.72 ± 0.01 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log new file mode 100644 index 0000000..11f7672 --- /dev/null +++ b/benchmark/results_old/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 58.40 ± 0.21 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 16.29 ± 0.01 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..e10adbb --- /dev/null +++ b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.74 ± 2.65 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 22.88 ± 0.01 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..bb3fa29 --- /dev/null +++ b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 151.25 ± 3.33 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 23.80 ± 0.09 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..47cba1d --- /dev/null +++ b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 154.95 ± 1.58 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 23.08 ± 0.08 | + +build: 4cb208c9 (6066) diff --git a/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..c75a868 --- /dev/null +++ b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 90.91 ± 0.35 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.96 ± 0.03 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..ef72d92 --- /dev/null +++ b/benchmark/results_old/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 71.16 ± 0.92 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.33 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..445b37e --- /dev/null +++ b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 150.53 ± 1.83 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 22.13 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..1204c49 --- /dev/null +++ b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 147.31 ± 2.22 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.12 ± 0.06 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..b366cae --- /dev/null +++ b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 144.59 ± 3.08 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 23.48 ± 0.01 | + +build: 4cb208c9 (6066) diff --git a/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..33fe404 --- /dev/null +++ b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 90.38 ± 0.57 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.00 ± 0.03 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..cd83c90 --- /dev/null +++ b/benchmark/results_old/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 71.53 ± 1.06 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.34 ± 0.01 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log new file mode 100644 index 0000000..21a2b99 --- /dev/null +++ b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 223.36 ± 0.23 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.81 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log new file mode 100644 index 0000000..fc2cc5b --- /dev/null +++ b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.95 ± 0.15 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.80 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log new file mode 100644 index 0000000..acf4970 --- /dev/null +++ b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.99 ± 0.24 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.81 ± 0.00 | + +build: 4cb208c9 (6066) diff --git a/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log new file mode 100644 index 0000000..2ba5269 --- /dev/null +++ b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 683.07 ± 1.03 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 13.84 ± 0.02 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log new file mode 100644 index 0000000..5d31829 --- /dev/null +++ b/benchmark/results_old/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 508.55 ± 0.90 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 13.65 ± 0.02 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log new file mode 100644 index 0000000..bbf9e04 --- /dev/null +++ b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 88.73 ± 0.50 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.02 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log new file mode 100644 index 0000000..a664b0b --- /dev/null +++ b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 82.31 ± 0.29 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.99 ± 0.01 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..8ab75d9 --- /dev/null +++ b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 83.18 ± 0.41 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.99 ± 0.00 | + +build: 4cb208c9 (6066) diff --git a/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log new file mode 100644 index 0000000..45f0b37 --- /dev/null +++ b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +ggml_vulkan: Device memory allocation of size 2819260416 failed. +ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory +main: error: failed to load model '/home/kyuz0/models/gemma-3-27b-it-BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1) diff --git a/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log new file mode 100644 index 0000000..0dccabf --- /dev/null +++ b/benchmark/results_old/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 135.40 ± 0.29 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 3.98 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log new file mode 100644 index 0000000..059fbe3 --- /dev/null +++ b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.02 ± 0.82 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.04 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm7_beta.log b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm7_beta.log new file mode 100644 index 0000000..67c76bf --- /dev/null +++ b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.93 ± 1.29 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.52 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm7_rc.log new file mode 100644 index 0000000..7fb6f1c --- /dev/null +++ b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 728.63 ± 1.23 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.59 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log new file mode 100644 index 0000000..2cfca97 --- /dev/null +++ b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1616.55 ± 4.61 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 83.89 ± 0.22 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__vulkan_radv.log new file mode 100644 index 0000000..1e319e2 --- /dev/null +++ b/benchmark/results_old/gemma-3-4b-it-Q3_K_S__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1520.07 ± 5.39 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.93 ± 0.09 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-F16__rocm6_4_2.log b/benchmark/results_old/gpt-oss-120b-F16__rocm6_4_2.log new file mode 100644 index 0000000..8f074d4 --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-F16__rocm6_4_2.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x394d3570) reason :GPU Hang +✖ ! [rocm6_4_2] gpt-oss-120b-F16 failed (exit 134) diff --git a/benchmark/results_old/gpt-oss-120b-F16__rocm7_beta.log b/benchmark/results_old/gpt-oss-120b-F16__rocm7_beta.log new file mode 100644 index 0000000..035fbc5 --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-F16__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 357.68 ± 1.49 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.70 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results_old/gpt-oss-120b-F16__rocm7_rc.log new file mode 100644 index 0000000..6e2747e --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-F16__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.47 ± 0.55 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.65 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results_old/gpt-oss-120b-F16__vulkan_amdvlk.log new file mode 100644 index 0000000..10f4c58 --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-F16__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 449.22 ± 1.12 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.49 ± 0.05 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results_old/gpt-oss-120b-F16__vulkan_radv.log new file mode 100644 index 0000000..9d49924 --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-F16__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 230.32 ± 0.72 | +| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.06 ± 0.02 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log new file mode 100644 index 0000000..378cd44 --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.53 ± 1.06 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 43.56 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log new file mode 100644 index 0000000..784987e --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x299852d0) reason :GPU Hang +✖ ! [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 failed (exit 134) diff --git a/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log new file mode 100644 index 0000000..f5968ae --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 351.08 ± 0.86 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 44.63 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log new file mode 100644 index 0000000..2dc8a85 --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 485.98 ± 2.23 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 48.09 ± 0.04 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log new file mode 100644 index 0000000..19e9a00 --- /dev/null +++ b/benchmark/results_old/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 239.16 ± 1.26 | +| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 48.93 ± 0.06 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-F32__rocm6_4_2.log b/benchmark/results_old/gpt-oss-20b-F32__rocm6_4_2.log new file mode 100644 index 0000000..ffdd70e --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-F32__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 323.64 ± 4.29 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.64 ± 0.06 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-F32__rocm7_beta.log b/benchmark/results_old/gpt-oss-20b-F32__rocm7_beta.log new file mode 100644 index 0000000..40fd017 --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-F32__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.15 ± 3.76 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.90 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results_old/gpt-oss-20b-F32__rocm7_rc.log new file mode 100644 index 0000000..272ca13 --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-F32__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.27 ± 5.39 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.86 ± 0.00 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results_old/gpt-oss-20b-F32__vulkan_amdvlk.log new file mode 100644 index 0000000..e8395dc --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-F32__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 369.86 ± 1.57 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 8.59 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results_old/gpt-oss-20b-F32__vulkan_radv.log new file mode 100644 index 0000000..41c0d3f --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-F32__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 318.82 ± 1.63 | +| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 7.77 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-mxfp4__rocm6_4_2.log b/benchmark/results_old/gpt-oss-20b-mxfp4__rocm6_4_2.log new file mode 100644 index 0000000..03d8cf8 --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-mxfp4__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 580.67 ± 2.03 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.26 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-mxfp4__rocm7_beta.log b/benchmark/results_old/gpt-oss-20b-mxfp4__rocm7_beta.log new file mode 100644 index 0000000..a4f7d4c --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-mxfp4__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.04 ± 2.48 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.37 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results_old/gpt-oss-20b-mxfp4__rocm7_rc.log new file mode 100644 index 0000000..7584083 --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-mxfp4__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.15 ± 2.11 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.38 ± 0.01 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results_old/gpt-oss-20b-mxfp4__vulkan_amdvlk.log new file mode 100644 index 0000000..60e3b9f --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-mxfp4__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1206.08 ± 8.80 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 68.90 ± 0.18 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results_old/gpt-oss-20b-mxfp4__vulkan_radv.log new file mode 100644 index 0000000..d9302e5 --- /dev/null +++ b/benchmark/results_old/gpt-oss-20b-mxfp4__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 646.77 ± 4.63 | +| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 69.82 ± 0.03 | + +build: 0d883154 (6101) diff --git a/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log new file mode 100644 index 0000000..cd91f9d --- /dev/null +++ b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.89 ± 0.03 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.59 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm7_beta.log b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm7_beta.log new file mode 100644 index 0000000..cdd01d1 --- /dev/null +++ b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm7_beta.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.91 ± 0.04 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.60 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm7_rc.log b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm7_rc.log new file mode 100644 index 0000000..782d37e --- /dev/null +++ b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__rocm7_rc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.82 ± 0.05 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.52 ± 0.00 | + +build: 4cb208c9 (6066) diff --git a/benchmark/results_old/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log new file mode 100644 index 0000000..2755187 --- /dev/null +++ b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 72.75 ± 0.03 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 5.01 ± 0.00 | + +build: 9c35706b (6060) diff --git a/benchmark/results_old/llama3.3-70.6B-Q4_K_M__vulkan_radv.log b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__vulkan_radv.log new file mode 100644 index 0000000..b827d6f --- /dev/null +++ b/benchmark/results_old/llama3.3-70.6B-Q4_K_M__vulkan_radv.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 79.12 ± 0.14 | +| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 4.97 ± 0.00 | + +build: 66625a59 (6040) diff --git a/benchmark/run_benchmarks.sh b/benchmark/run_benchmarks.sh index 64557c3..3254fcf 100755 --- a/benchmark/run_benchmarks.sh +++ b/benchmark/run_benchmarks.sh @@ -38,25 +38,36 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do for ENV in "${!CMDS[@]}"; do CMD="${CMDS[$ENV]}" - OUT="$RESULTDIR/${MODEL_NAME}__${ENV}.log" - # skip if we already have a non-empty log - if [[ -s "$OUT" ]]; then - echo "⏩ Skipping [${ENV}] ${MODEL_NAME}, log already exists at $OUT" - continue - fi + # run twice: baseline and with flash attention + for FA in 0 1; do + SUFFIX="" + EXTRA_ARGS=() + if (( FA == 1 )); then + SUFFIX="__fa1" + EXTRA_ARGS=( -fa 1 ) + fi - # build command array - FULL_CMD=( $CMD -ngl 99 -mmp 0 -m "$MODEL_PATH" ) + OUT="$RESULTDIR/${MODEL_NAME}__${ENV}${SUFFIX}.log" - printf "\n▶ [%s] %s\n" "$ENV" "$MODEL_NAME" - printf " → log: %s\n" "$OUT" - printf " → cmd: %s\n\n" "${FULL_CMD[*]}" + # skip if we already have a non-empty log + if [[ -s "$OUT" ]]; then + echo "⏩ Skipping [${ENV}] ${MODEL_NAME}${SUFFIX:+ ($SUFFIX)}, log already exists at $OUT" + continue + fi - # execute - "${FULL_CMD[@]}" >"$OUT" 2>&1 || { - echo "✖ ! [${ENV}] ${MODEL_NAME} failed (exit $?)" >>"$OUT" - echo " * [${ENV}] ${MODEL_NAME} : FAILED" - } + # build command array + FULL_CMD=( $CMD -ngl 99 -mmp 0 -m "$MODEL_PATH" "${EXTRA_ARGS[@]}" ) + + printf "\n▶ [%s] %s%s\n" "$ENV" "$MODEL_NAME" "${SUFFIX:+ $SUFFIX}" + printf " → log: %s\n" "$OUT" + printf " → cmd: %s\n\n" "${FULL_CMD[*]}" + + # execute + "${FULL_CMD[@]}" >"$OUT" 2>&1 || { + echo "✖ ! [${ENV}] ${MODEL_NAME}${SUFFIX:+ $SUFFIX} failed (exit $?)" >>"$OUT" + echo " * [${ENV}] ${MODEL_NAME}${SUFFIX:+ $SUFFIX} : FAILED" + } + done done done diff --git a/pages/index.html b/pages/index.html deleted file mode 100644 index 8f352fa..0000000 --- a/pages/index.html +++ /dev/null @@ -1,584 +0,0 @@ - - - - - - - Strix Halo — Model ↔ Backend Comparator - - - - -
-

Strix Halo — llama.cpp Backend Comparator

-

- Compare model throughput across backends (pp512 & tg128). - Repo: kyuz0/amd-strix-halo-toolboxes -

-

Loading meta…

-
- - -
-
- - -
-
- - -
-
- -
- - -
-
- -
-
- - -
-
- -
-
Winner = every selected backend within the best’s uncertainty range, combining ± errors from both results.
- -
-

Prompt Processing (pp512) — tokens/second

-
-
-
- - - - - -
-
-
- -
-

Text Generation (tg128) — tokens/second

-
-
-
- - - - - -
-
-
- - - - - \ No newline at end of file diff --git a/pages/results.json b/pages/results.json deleted file mode 100644 index 15af715..0000000 --- a/pages/results.json +++ /dev/null @@ -1,9939 +0,0 @@ -{ - "meta": { - "generated_at": "2025-08-09T09:04:36Z", - "os_kernel": "Fedora 42 \u2014 Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)", - "llamacpp_builds": [ - { - "hash": "cd6983d5", - "number": "6119" - } - ], - "environments": [ - "rocm6_4_2", - "rocm6_4_2-rocwmma", - "rocm7_beta", - "rocm7_rc", - "vulkan_amdvlk", - "vulkan_radv" - ], - "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second" - }, - "runs": [ - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 129.88, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 19.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 130.17, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 19.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 103.63, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 200.76, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 22.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 201.86, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 22.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 127.73, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 22.88, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 132.54, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 23.31, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 113.62, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 15.47, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 124.82, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 120.54, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 124.18, - "tps_std": 0.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 223.02, - "tps_std": 0.69, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 16.47, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 224.54, - "tps_std": 0.65, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 16.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 127.36, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 16.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 131.78, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 16.99, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 33.3, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 31.09, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": null - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 78.7, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 81.29, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 72.71, - "file_size_gib": 78.21, - "name_params_b": 72.0, - "quant": "Q8_K_XL", - "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 33.32, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 31.28, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 98.14, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 99.24, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 80.11, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 82.9, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 134.39, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 135.25, - "tps_std": 0.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 243.45, - "tps_std": 1.29, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.29, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 247.48, - "tps_std": 1.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 148.25, - "tps_std": 0.91, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.21, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 149.82, - "tps_std": 0.83, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.21, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 135.44, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 11.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 258.18, - "tps_std": 1.38, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 12.23, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 260.16, - "tps_std": 1.44, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 12.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 168.63, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 12.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 172.37, - "tps_std": 0.92, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 12.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 17.0, - "quant": null, - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 138.27, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 138.9, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 123.61, - "tps_std": 0.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 123.58, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 218.18, - "tps_std": 0.83, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.04, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 221.15, - "tps_std": 0.74, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 19.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 152.21, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 19.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 155.22, - "tps_std": 1.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 19.93, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 74.15, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 114.49, - "tps_std": 0.6, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 116.07, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.84, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 64.85, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 16.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 66.76, - "tps_std": 0.43, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 16.83, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 157.95, - "tps_std": 2.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 24.53, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 162.19, - "tps_std": 3.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 24.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 157.69, - "tps_std": 2.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 23.89, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 140.32, - "tps_std": 2.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 24.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 153.49, - "tps_std": 1.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 138.49, - "tps_std": 2.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 24.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 152.26, - "tps_std": 2.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 137.52, - "tps_std": 1.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 24.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 107.48, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 8.04, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 107.64, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.96, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 85.97, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 7.38, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 87.05, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 388.77, - "tps_std": 0.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 50.31, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 412.35, - "tps_std": 1.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 48.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 388.72, - "tps_std": 2.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.19, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 301.29, - "tps_std": 0.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 390.07, - "tps_std": 0.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.19, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 300.6, - "tps_std": 2.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 388.99, - "tps_std": 1.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.31, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 302.87, - "tps_std": 0.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 736.95, - "tps_std": 3.72, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 56.89, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 727.71, - "tps_std": 2.81, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 53.34, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 395.16, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 58.95, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 405.61, - "tps_std": 1.85, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 58.06, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.0, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 150.5, - "tps_std": 1.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 24.55, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 154.09, - "tps_std": 1.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 24.02, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 150.34, - "tps_std": 1.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 134.4, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 24.32, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 146.55, - "tps_std": 1.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": null - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 145.91, - "tps_std": 1.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 106.99, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 8.03, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 107.1, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.98, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 85.5, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 7.42, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 86.52, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 223.38, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 13.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 229.77, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 13.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 222.86, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 202.13, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 222.67, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 203.12, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 222.49, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 201.47, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 676.94, - "tps_std": 0.85, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.99, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 371.17, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 12.3, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 503.27, - "tps_std": 1.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.76, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 495.99, - "tps_std": 2.36, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.61, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": null, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 92.52, - "tps_std": 0.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 94.54, - "tps_std": 0.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log", - "build": null - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 83.75, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 91.54, - "tps_std": 0.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 83.61, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 55.68, - "tps_std": 0.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 3.11, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 83.08, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": null - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": null - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 135.58, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 138.61, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 729.91, - "tps_std": 1.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 76.14, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 752.25, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 69.93, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 730.51, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 76.35, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 645.88, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 69.63, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 732.13, - "tps_std": 1.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 76.23, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 652.29, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 69.62, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 730.59, - "tps_std": 1.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 76.01, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 646.16, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 69.53, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1614.72, - "tps_std": 4.91, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 84.0, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 942.34, - "tps_std": 1.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 57.7, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1527.75, - "tps_std": 3.86, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 85.54, - "tps_std": 0.99, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1489.57, - "tps_std": 4.71, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 80.63, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 355.01, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 33.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 411.33, - "tps_std": 1.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 33.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 353.36, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 31.9, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 247.95, - "tps_std": 0.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 357.38, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 249.65, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.04, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 356.67, - "tps_std": 0.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.68, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 247.49, - "tps_std": 0.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 448.17, - "tps_std": 1.37, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.39, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 498.69, - "tps_std": 2.19, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.06, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 229.59, - "tps_std": 0.74, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 243.4, - "tps_std": 0.99, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 353.53, - "tps_std": 0.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 45.05, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 408.5, - "tps_std": 1.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 44.69, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 353.45, - "tps_std": 1.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 44.12, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 246.76, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 43.67, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 354.82, - "tps_std": 1.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 45.0, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 248.22, - "tps_std": 0.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 44.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 353.2, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 45.15, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 486.9, - "tps_std": 2.23, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 48.08, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 546.41, - "tps_std": 2.88, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 47.25, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 239.72, - "tps_std": 1.23, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 49.01, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 255.17, - "tps_std": 1.65, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 48.93, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 324.54, - "tps_std": 4.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 26.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 380.87, - "tps_std": 8.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 26.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 323.86, - "tps_std": 4.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 26.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 257.11, - "tps_std": 2.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 26.47, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 322.43, - "tps_std": 2.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 26.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 254.08, - "tps_std": 3.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 26.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 319.36, - "tps_std": 3.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 26.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 254.87, - "tps_std": 2.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 26.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 369.69, - "tps_std": 1.79, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 8.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 389.86, - "tps_std": 2.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 8.58, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 319.09, - "tps_std": 1.46, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 7.79, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 335.15, - "tps_std": 1.8, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.79, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": null, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 580.83, - "tps_std": 2.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 64.47, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 649.48, - "tps_std": 3.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 64.18, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 582.89, - "tps_std": 2.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 64.45, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 394.67, - "tps_std": 1.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 62.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 583.52, - "tps_std": 2.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 64.39, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 396.75, - "tps_std": 0.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 62.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 581.83, - "tps_std": 1.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 64.5, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 394.87, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 63.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1205.02, - "tps_std": 7.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 68.84, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1472.56, - "tps_std": 14.39, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 67.78, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 648.85, - "tps_std": 6.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 69.88, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 728.38, - "tps_std": 8.17, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 69.8, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 33.47, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 34.51, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2-rocwmma", - "env_base": "rocm6_4_2", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 33.79, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 31.67, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm6_4_2", - "env_base": "rocm6_4_2", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 33.88, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 31.67, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_beta", - "env_base": "rocm7_beta", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 33.91, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 31.66, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 72.75, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 5.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 73.57, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 5.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 78.99, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 5.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 80.92, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - }, - { - "model": "llama3.3-70.6B-Q4_K_M", - "model_clean": "llama3.3-70.6B-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 999, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 39.59, - "name_params_b": 70.6, - "quant": "Q4_K_M", - "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log", - "build": { - "hash": "cd6983d5", - "number": "6119" - } - } - ] -} \ No newline at end of file