diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ed8ebf5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ \ No newline at end of file diff --git a/benchmark/generate_results.json.py b/benchmark/generate_results.json.py index 1f7061b..996cef5 100644 --- a/benchmark/generate_results.json.py +++ b/benchmark/generate_results.json.py @@ -2,7 +2,10 @@ import re, glob, os, json, time from pathlib import Path -RESULTS_DIR = "results" +RESULT_SOURCES = [ + ("results", False), # regular single-node runs + ("results-rpc", True), # distributed RPC runs across two servers +] OUT_JSON = "../docs/results.json" # --- Regexes --------------------------------------------------------------- @@ -39,23 +42,39 @@ LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE) # --- Helpers --------------------------------------------------------------- +ENV_CANON = { + "rocm7_1": "rocm7.1", +} + def clean_model_name(raw): base = SHARD_RE.sub("", raw) return base +def canonicalize_env(env): + if not env: + return env + for raw, canon in ENV_CANON.items(): + prefix = f"{raw}-" + if env == raw: + return canon + if env.startswith(prefix): + return canon + env[len(raw):] + return env + def parse_env_flags(basename): """ - pattern: __[__fa1][__hblt0][__longctx32768] - Returns (env, fa, context_tag, context_tokens) + pattern: __[__fa1][__hblt0][__longctx32768][__rpc] + Returns (env, fa, context_tag, context_tokens, rpc_flag) """ parts = basename.split("__") if len(parts) < 2: - return None, False, "default", None + return None, False, "default", None, False env = parts[1] fa = False context_tag = "default" context_tokens = None + rpc_flag = False for raw_suffix in parts[2:]: suffix = raw_suffix.lower() @@ -71,8 +90,10 @@ def parse_env_flags(basename): context_tokens = int(m.group(1)) except ValueError: context_tokens = None + elif suffix == "rpc": + rpc_flag = True - return env, fa, context_tag, context_tokens + return env, fa, context_tag, context_tokens, rpc_flag def env_base_and_variant(env): # e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma") @@ -148,111 +169,115 @@ runs = [] builds = set() envs = set() -for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))): - base = os.path.basename(path).rsplit(".log", 1)[0] - if "__" not in base: - continue +for results_dir, is_rpc_source in RESULT_SOURCES: + glob_pattern = os.path.join(results_dir, "*.log") + for path in sorted(glob.glob(glob_pattern)): + base = os.path.basename(path).rsplit(".log", 1)[0] + if "__" not in base: + continue - model_raw, _rest = base.split("__", 1) - env, fa_from_name, context_tag, context_tokens = parse_env_flags(base) - if env: - envs.add(env) + model_raw, _rest = base.split("__", 1) + env, fa_from_name, context_tag, context_tokens, rpc_flag = parse_env_flags(base) + env = canonicalize_env(env) + if env: + envs.add(env) - model_clean = clean_model_name(model_raw) + model_clean = clean_model_name(model_raw) - with open(path, errors="ignore") as f: - text = f.read() + with open(path, errors="ignore") as f: + text = f.read() - # build info (take the last match in file if many) - build_hash, build_num = None, None - for m in BUILD_RE.finditer(text): - build_hash, build_num = m.group(1), m.group(2) - if build_hash: - builds.add((build_hash, build_num)) + # build info (take the last match in file if many) + build_hash, build_num = None, None + for m in BUILD_RE.finditer(text): + build_hash, build_num = m.group(1), m.group(2) + if build_hash: + builds.add((build_hash, build_num)) - # detect error (if there is no valid table rows) - table_rows = parse_table(text) + # detect error (if there is no valid table rows) + table_rows = parse_table(text) - # If table rows exist, we’ll still mark errors only if no perf found - has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows) - has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows) - error, etype = (False, None) - if not (has_pp or has_tg): - error, etype = detect_error(text) + # If table rows exist, we’ll still mark errors only if no perf found + has_pp = any(r.get("test","").lower()=="pp512" for r in table_rows) + has_tg = any(r.get("test","").lower()=="tg128" for r in table_rows) + error, etype = (False, None) + if not (has_pp or has_tg): + error, etype = detect_error(text) - # Determine FA flag: - # prefer explicit column "fa" if present, else fallback to filename "__fa1" - fa_in_table = None - for r in table_rows: - if "fa" in r: - try: - fa_in_table = int(r["fa"]) == 1 - except: - fa_in_table = None - break - fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name + # Determine FA flag: + # prefer explicit column "fa" if present, else fallback to filename "__fa1" + fa_in_table = None + for r in table_rows: + if "fa" in r: + try: + fa_in_table = int(r["fa"]) == 1 + except: + fa_in_table = None + break + fa_enabled = fa_in_table if fa_in_table is not None else fa_from_name - # Normalize env base / variant (e.g., rocwmma) - env_base, env_variant = env_base_and_variant(env) + # Normalize env base / variant (e.g., rocwmma) + env_base, env_variant = env_base_and_variant(env) - # Emit one run per row (pp512 / tg128) - for r in table_rows or [{}]: - test = r.get("test", "").lower() if table_rows else None - tps_mean, tps_std = None, None - if table_rows: - ts_field = r.get("t/s", "") - m = TS_RE.search(ts_field) - if m: - tps_mean = coerce_float(m.group(1)) - tps_std = coerce_float(m.group(2)) + # Emit one run per row (pp512 / tg128) + for r in table_rows or [{}]: + test = r.get("test", "").lower() if table_rows else None + tps_mean, tps_std = None, None + if table_rows: + ts_field = r.get("t/s", "") + m = TS_RE.search(ts_field) + if m: + tps_mean = coerce_float(m.group(1)) + tps_std = coerce_float(m.group(2)) - # parse numeric helpers from row (if present) - params_b = None - file_size_gib = None - if "params" in r: - pm = PARAMS_RE.search(r["params"]) - if pm: - params_b = coerce_float(pm.group(1).replace(",", "")) - if "size" in r: - sm = GIB_RE.search(r["size"]) - if sm: - file_size_gib = coerce_float(sm.group(1).replace(",", "")) + # parse numeric helpers from row (if present) + params_b = None + file_size_gib = None + if "params" in r: + pm = PARAMS_RE.search(r["params"]) + if pm: + params_b = coerce_float(pm.group(1).replace(",", "")) + if "size" in r: + sm = GIB_RE.search(r["size"]) + if sm: + file_size_gib = coerce_float(sm.group(1).replace(",", "")) - # quant from model name (unchanged) - quant = extract_quant(model_clean) + # quant from model name (unchanged) + quant = extract_quant(model_clean) - # name_params_b: prefer table value; else fall back to B in model name - name_params_b = params_b if params_b is not None else b_from_name(model_clean) + # name_params_b: prefer table value; else fall back to B in model name + name_params_b = params_b if params_b is not None else b_from_name(model_clean) - backend = r.get("backend") - ngl = r.get("ngl") - mmap = r.get("mmap") + backend = r.get("backend") + ngl = r.get("ngl") + mmap = r.get("mmap") - run = { - "model": model_raw, - "model_clean": model_clean, - "env": env, - "env_base": env_base, - "env_variant": env_variant, # e.g. "rocwmma" - "fa": bool(fa_enabled), - "context": context_tag or "default", - "context_tokens": context_tokens, - "test": test, # "pp512" | "tg128" | None (if error) - "tps_mean": tps_mean, - "tps_std": tps_std, - "error": bool(error), - "error_type": etype, # "load" | "hang" | "runtime" | None - "backend": backend, - "ngl": (int(ngl) if (ngl and ngl.isdigit()) else None), - "mmap": (int(mmap) if (mmap and mmap.isdigit()) else None), - "params_b": params_b, # from table, if available - "file_size_gib": file_size_gib, # from table, if available - "name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0) - "quant": quant, - "log": path, - "build": {"hash": build_hash, "number": build_num} if build_hash else None, - } - runs.append(run) + run = { + "model": model_raw, + "model_clean": model_clean, + "env": env, + "env_base": env_base, + "env_variant": env_variant, # e.g. "rocwmma" + "fa": bool(fa_enabled), + "context": context_tag or "default", + "context_tokens": context_tokens, + "test": test, # "pp512" | "tg128" | None (if error) + "tps_mean": tps_mean, + "tps_std": tps_std, + "error": bool(error), + "error_type": etype, # "load" | "hang" | "runtime" | None + "backend": backend, + "ngl": (int(ngl) if (ngl and ngl.isdigit()) else None), + "mmap": (int(mmap) if (mmap and mmap.isdigit()) else None), + "params_b": params_b, # from table, if available + "file_size_gib": file_size_gib, # from table, if available + "name_params_b": name_params_b, # parsed from model name (e.g., 30B -> 30.0) + "quant": quant, + "log": path, + "rpc": bool(is_rpc_source or rpc_flag), + "build": {"hash": build_hash, "number": build_num} if build_hash else None, + } + runs.append(run) # Meta meta = { diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..574e760 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 64.83 ± 0.23 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.69 ± 0.01 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log new file mode 100644 index 0000000..f1b98aa --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 50.19 ± 0.10 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log new file mode 100644 index 0000000..c97790a --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 66.04 ± 0.17 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log new file mode 100644 index 0000000..3b34c65 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 50.78 ± 0.06 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.00 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..8446dc2 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 62.70 ± 0.13 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.71 ± 0.01 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log new file mode 100644 index 0000000..ad9c0bd --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 45.01 ± 0.11 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log new file mode 100644 index 0000000..07be6a3 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 65.83 ± 0.13 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.02 | + +build: 677be4d78 (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log new file mode 100644 index 0000000..1ffe290 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 42.96 ± 0.13 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.73 ± 0.01 | + +build: 677be4d78 (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log new file mode 100644 index 0000000..60dac02 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 61.63 ± 0.11 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.92 ± 0.01 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log new file mode 100644 index 0000000..e3e0eb9 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 36.76 ± 0.06 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.93 ± 0.01 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..77fd6a7 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 66.33 ± 0.03 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.04 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log new file mode 100644 index 0000000..026e9fb --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 30.17 ± 0.09 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.05 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log new file mode 100644 index 0000000..2f9db40 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log @@ -0,0 +1,21 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:858: Remote RPC server crashed or returned malformed response +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fd18621c565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd18621c92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fd18621caaf] +/usr/local/lib64/libggml-rpc.so.0(+0xa195) [0x7fd1862ca195] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fd186236de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fd189269650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fd18926b2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fd1892701bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fd18927100e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7fd185bb25b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fd185bb2668] +/usr/local/bin/llama-bench() [0x409255] diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log new file mode 100644 index 0000000..b0df852 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 31.42 ± 0.09 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 9.08 ± 0.01 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..b3393bd --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 63.23 ± 0.18 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.70 ± 0.01 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log new file mode 100644 index 0000000..7e48788 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 30.12 ± 0.09 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.01 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log new file mode 100644 index 0000000..5a6310e --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 63.17 ± 0.13 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.72 ± 0.01 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log new file mode 100644 index 0000000..31f1573 --- /dev/null +++ b/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 36.22 ± 0.08 | +| glm4moe 355B.A32B Q4_K - Medium | 189.69 GiB | 356.79 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 8.71 ± 0.01 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..843293e --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 172.03 ± 0.73 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.02 ± 0.02 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log new file mode 100644 index 0000000..2f95cfd --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 174.52 ± 1.29 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.94 ± 0.07 | + +build: caca0d55c (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log new file mode 100644 index 0000000..9c52ead --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 171.93 ± 1.16 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.06 ± 0.03 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log new file mode 100644 index 0000000..61eb3e8 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 174.45 ± 1.01 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.02 ± 0.03 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..dfdb69f --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 170.89 ± 0.37 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.89 ± 0.08 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log new file mode 100644 index 0000000..fc0a411 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 173.53 ± 1.57 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.92 ± 0.05 | + +build: f1840a25d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log new file mode 100644 index 0000000..7fb8166 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 172.01 ± 0.91 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.95 ± 0.04 | + +build: 677be4d78 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log new file mode 100644 index 0000000..f43d260 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 173.90 ± 0.67 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.95 ± 0.03 | + +build: 677be4d78 (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log new file mode 100644 index 0000000..c48d631 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 142.82 ± 0.78 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.85 ± 0.08 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log new file mode 100644 index 0000000..62cf4a2 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 144.47 ± 1.03 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.86 ± 0.03 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..0c2dde1 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 143.05 ± 1.08 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.00 ± 0.05 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log new file mode 100644 index 0000000..bdd2574 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 145.60 ± 1.03 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.94 ± 0.02 | + +build: 4db63cdde (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log new file mode 100644 index 0000000..1e3deba --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 144.59 ± 0.50 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.01 ± 0.03 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log new file mode 100644 index 0000000..a9a4865 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 146.21 ± 2.31 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 18.05 ± 0.10 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log new file mode 100644 index 0000000..0827122 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 170.42 ± 0.65 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.88 ± 0.06 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log new file mode 100644 index 0000000..1e0b5ce --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 174.42 ± 0.52 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.87 ± 0.12 | + +build: b447a9a4b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log new file mode 100644 index 0000000..aee1c0d --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 171.34 ± 0.97 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.97 ± 0.01 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log new file mode 100644 index 0000000..0f540e2 --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | pp512 | 173.98 ± 1.06 | +| minimax-m2 230B.A10B Q6_K | 180.94 GiB | 228.69 B | ROCm,RPC | 99 | 1 | 0 | tg128 | 17.94 ± 0.03 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log new file mode 100644 index 0000000..260bc0e --- /dev/null +++ b/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log @@ -0,0 +1,18 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp:690: Remote RPC server crashed or returned malformed response +/lib64/libggml-base.so.0(+0x3565) [0x7f5d2cbe9565] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5d2cbe992b] +/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5d2cbe9aaf] +/lib64/libggml-rpc.so.0(+0x452a) [0x7f5d2fb8252a] +/lib64/libggml-base.so.0(+0x16232) [0x7f5d2cbfc232] +/lib64/libggml-base.so.0(ggml_backend_alloc_ctx_tensors_from_buft+0xff) [0x7f5d2cbfdf1f] +/lib64/libllama.so.0(_ZN11llama_model12load_tensorsER18llama_model_loader+0x3a26) [0x7f5d2fdaad06] +/lib64/libllama.so.0(+0x1cf16) [0x7f5d2fd11f16] +/lib64/libllama.so.0(llama_model_load_from_file+0xac) [0x7f5d2fd12d7c] +/usr/sbin/llama-bench() [0x406d85] +/lib64/libc.so.6(+0x35b5) [0x7f5d2c57f5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5d2c57f668] +/usr/sbin/llama-bench() [0x409255] diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..3ae2157 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 167.68 ± 0.26 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.67 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..57057a1 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.85 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f5f7bd95565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5f7bd9592b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5f7bd95aaf] +/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f5f7f04eeb2] +/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f5f7f054034] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f5f7bdac8ce] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f5f7f70a950] +/usr/local/bin/llama-bench() [0x408242] +/lib64/libc.so.6(+0x35b5) [0x7f5f7b72b5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5f7b72b668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..bfa5aa4 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 170.65 ± 0.11 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.54 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3b6288e --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.57 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.38 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..2f9b66e --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.42 ± 0.59 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.69 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..89eb656 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f2015391565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f201539192b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2015391aaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f2017d0af12] +/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f2017d12a66] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f2017d0ffcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20153abde3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20183de650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20183e02e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20183e51bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20183e600e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f2014d275b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2014d27668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..63a07c8 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 147.75 ± 0.96 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..bc091bc --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.08 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..9b05d14 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.67 ± 0.37 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2e3ff57 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.60 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.34 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..55890b0 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 151.03 ± 0.71 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..b139639 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,28 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f1a5d310565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a5d31092b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a5d310aaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f1a5fc89f12] +/usr/local/lib64/libggml-hip.so.0(+0x28ce0d7) [0x7f1a5fc9b0d7] +/usr/local/lib64/libggml-hip.so.0(+0x28cccd1) [0x7f1a5fc99cd1] +/usr/local/lib64/libggml-hip.so.0(+0x28cb92c) [0x7f1a5fc9892c] +/usr/local/lib64/libggml-hip.so.0(+0x28c645a) [0x7f1a5fc9345a] +/usr/local/lib64/libggml-hip.so.0(+0x28c2f0a) [0x7f1a5fc8ff0a] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f1a5fc8efcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f1a5d32ade3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1a6035d650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f1a6035f2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f1a603641bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f1a6036500e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f1a5cca65b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a5cca6668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..661ebf4 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.51 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d5ffe77 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.96 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..561ddfe --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.07 ± 0.08 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..5c6dd7f --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.71 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f295ddb7565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f295ddb792b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f295ddb7aaf] +/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f2960686fb2] +/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f296068c004] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f295ddce8ce] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f2960d2e950] +/usr/local/bin/llama-bench() [0x408242] +/lib64/libc.so.6(+0x35b5) [0x7f295d74d5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f295d74d668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..508f69e --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.84 ± 0.31 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8d3bd91 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.86 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.28 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..4f0c03b --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f39038cd565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f39038cd92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f39038cdaaf] +/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f3906b86eb2] +/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f3906b8eb36] +/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f3906b8c09f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f39038e7de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f3907243650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f39072452e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f390724a1bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f390724b00e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f39032635b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3903263668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8e5e336 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 152.66 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.29 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..9e8dc16 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 274.07 ± 3.25 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 15.13 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..95168a7 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.44 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f3efb9fa565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3efb9fa92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3efb9faaaf] +/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f3efe2c9fb2] +/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f3efe2cf004] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3efba118ce] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3efe971950] +/usr/local/bin/llama-bench() [0x408242] +/lib64/libc.so.6(+0x35b5) [0x7f3efb3905b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3efb390668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..a61d3a6 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f20b4ffb565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f20b4ffb92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f20b4ffbaaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f20b7974f12] +/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f20b797ca66] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f20b7979fcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20b5015de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20b8048650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20b804a2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20b804f1bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20b805000e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f20b49915b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f20b4991668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6f8c70b --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fe4591ff565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe4591ff92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fe4591ffaaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fe45bb78f12] +/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fe45bb80a66] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fe45bb7dfcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fe459219de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fe45c24c650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fe45c24e2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fe45c2531bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fe45c25400e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7fe458b955b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fe458b95668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..8439859 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 271.67 ± 1.52 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.13 ± 0.05 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..97aa215 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.04 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f0845525565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f084552592b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f0845525aaf] +/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f08487deeb2] +/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f08487e4034] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f084553c8ce] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f0848e9a950] +/usr/local/bin/llama-bench() [0x408242] +/lib64/libc.so.6(+0x35b5) [0x7f0844ebb5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0844ebb668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..448f974 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 269.91 ± 0.99 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.11 ± 0.05 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c054b2f --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.41 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.67 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..2f93317 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f6a6bb84565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6a6bb8492b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6a6bb84aaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f6a6e4fdf12] +/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f6a6e505a66] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f6a6e502fcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f6a6bb9ede3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6a6ebd1650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f6a6ebd32e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f6a6ebd81bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f6a6ebd900e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x40816d] +/lib64/libc.so.6(+0x35b5) [0x7f6a6b51a5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f6a6b51a668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..0f79dc0 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fa8c83e4565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa8c83e492b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa8c83e4aaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fa8cad5df12] +/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fa8cad65a66] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fa8cad62fcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa8c83fede3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa8cb431650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fa8cb4332e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fa8cb4381bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fa8cb43900e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x408087] +/lib64/libc.so.6(+0x35b5) [0x7fa8c7d7a5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa8c7d7a668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..1ecb7ad --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 312.46 ± 3.80 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.50 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..84da9ad --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f68ae79e565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f68ae79e92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f68ae79eaaf] +/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f68b1a57eb2] +/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f68b1a5fb36] +/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f68b1a5d09f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f68ae7b8de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f68b2114650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f68b21162e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f68b211b1bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f68b211c00e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x407edc] +/lib64/libc.so.6(+0x35b5) [0x7f68ae1345b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f68ae134668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..771396a --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 313.81 ± 0.68 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.48 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..522bff5 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.58 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.31 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..217184c --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 315.62 ± 2.64 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.51 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..0aefcee --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7effceeac565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7effceeac92b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7effceeacaaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7effd1825f12] +/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7effd182da66] +/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7effd182afcf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7effceec6de3] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7effd1ef9650] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7effd1efb2e2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7effd1f001bf] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7effd1f0100e] +/usr/local/bin/llama-bench() [0x40a3db] +/usr/local/bin/llama-bench() [0x408087] +/lib64/libc.so.6(+0x35b5) [0x7effce8425b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7effce842668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..24469f8 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.40 ± 0.48 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.93 ± 0.23 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..af2a816 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.58 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.43 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..88c43ca --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.52 ± 0.12 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.05 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..df74048 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,29 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.46 ± 0.00 | +:0:rocdevice.cpp :3588: 50932421658 us: Callback: Queue 0x7f8e6a000000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 +Kernel Name: _ZL18flash_attn_ext_vecILi128ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil +VGPU=0x94e06a0 SWq=0x7f8e6cbea000, HWq=0x7f8e6a000000, id=2 + Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 + grid=[32, 68, 64], workgroup=[32, 4, 1] + private_seg_size=0, group_seg_size=4352 + kernel_obj=0x7f8e6a78f180, kernarg_address=0x0x7f738bd49400 + completion_signal=0x0, correlation_id=0 + rptr=1368490, wptr=1369554 + /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f8e79498565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8e7949892b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f8e79498aaf] +/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f8e7bd67fb2] +/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f8e7bd6d004] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f8e794af8ce] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f8e7c40f950] +/usr/local/bin/llama-bench() [0x408242] +/lib64/libc.so.6(+0x35b5) [0x7f8e78e2e5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8e78e2e668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..8c3fd70 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.69 ± 0.99 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.05 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1eaf21c --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.47 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.20 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..145d641 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 352.23 ± 9.28 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a4cc1c5 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 192.75 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..7a1044d --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 345.22 ± 23.61 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.84 ± 0.40 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..55cb214 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 135.26 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f83b9245565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f83b924592b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f83b9245aaf] +/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f83bbb14fb2] +/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f83bbb1a004] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f83b925c8ce] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f83bc1bc950] +/usr/local/bin/llama-bench() [0x408242] +/lib64/libc.so.6(+0x35b5) [0x7f83b8bdb5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f83b8bdb668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..28d5bcc --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 360.93 ± 3.44 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.17 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cd6acc8 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 197.49 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..787f080 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 579.57 ± 12.23 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.33 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..03c9906 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.50 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.86 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..18d9eef --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.31 ± 5.34 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.66 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..63d2bdc --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.86 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.72 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..5ba10ae --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.33 ± 7.18 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.48 ± 0.01 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..7ef2b2f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.69 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.79 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..1242940 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 669.29 ± 4.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.10 ± 0.01 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..126ba4b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.78 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.71 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..c79d672 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 666.63 ± 5.54 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.62 ± 0.02 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ac4567c --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 148.47 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.94 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..2ffcac8 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 676.38 ± 1.86 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.44 ± 0.02 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e16563c --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.70 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.64 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log new file mode 100644 index 0000000..5a4dab6 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 602.73 ± 3.88 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.21 ± 0.01 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log new file mode 100644 index 0000000..7158506 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.48 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.72 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..73b92db --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 587.21 ± 4.27 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.40 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..14f3269 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 200.93 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.69 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..1e00f5d --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 601.39 ± 7.96 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.54 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..43f023f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.77 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.38 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..6b73205 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.70 ± 2.25 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.59 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..bc1d0e0 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 148.73 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.39 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log new file mode 100644 index 0000000..bb83bac --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 601.34 ± 1.60 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.45 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..94073cb --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.98 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.64 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..f5ae77f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.58 ± 4.35 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.38 ± 0.01 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..41df426 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.30 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.69 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..c011dfc --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 652.89 ± 1.70 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.10 ± 0.01 | + +build: caca0d55c (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..67fe066 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 110.83 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.52 ± 0.00 | + +build: caca0d55c (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..ab3540f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 638.38 ± 7.05 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.12 ± 0.00 | + +build: caca0d55c (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..73034cd --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.95 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 | + +build: caca0d55c (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..db0c91d --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 648.39 ± 23.62 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.52 ± 0.01 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..b2cba3d --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 218.15 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.43 ± 0.00 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..bb24ded --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 640.53 ± 6.75 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.87 ± 0.01 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9ea6ed4 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.10 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.58 ± 0.00 | + +build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..5902268 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 650.26 ± 1.03 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.80 ± 0.01 | + +build: f1840a25d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..be9c9f7 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.22 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 | + +build: f1840a25d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..a681ee4 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 634.84 ± 9.56 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.78 ± 0.01 | + +build: f1840a25d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e9be1b9 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.93 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.56 ± 0.00 | + +build: f1840a25d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log new file mode 100644 index 0000000..d9f4189 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 649.99 ± 3.07 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.58 ± 0.01 | + +build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..1d5d83e --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.65 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.45 ± 0.00 | + +build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..6ccec8d --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 640.61 ± 7.82 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.69 ± 0.01 | + +build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c37f41e --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.74 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.45 ± 0.00 | + +build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log new file mode 100644 index 0000000..c10b3e0 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 648.21 ± 4.33 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.01 | + +build: b447a9a4b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..410a040 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.20 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 | + +build: b447a9a4b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..6c1a5da --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 631.07 ± 4.70 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.89 ± 0.01 | + +build: b447a9a4b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..edbb267 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.72 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.55 ± 0.00 | + +build: b447a9a4b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log new file mode 100644 index 0000000..0c5207d --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 654.79 ± 1.55 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.22 ± 0.01 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log new file mode 100644 index 0000000..3537084 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 237.14 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.14 ± 0.00 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log new file mode 100644 index 0000000..64802b1 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 633.61 ± 5.41 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.67 ± 0.01 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d70d55e --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 221.13 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.49 ± 0.00 | + +build: fa5c85a8b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..c79672b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1152.51 ± 1.98 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 45.58 ± 0.02 | + +build: ab5783eb4 (7089) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..f4c46b7 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.90 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 19.23 ± 0.00 | + +build: ab5783eb4 (7089) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..9616a45 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 916.61 ± 3.21 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 45.81 ± 0.01 | + +build: 0a3857fe0 (7089) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..bba9e01 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 108.80 ± 0.00 | +| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 25.33 ± 0.00 | + +build: 0a3857fe0 (7089) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..6fc839e --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 799.75 ± 0.53 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.21 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d51e846 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 335.43 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.57 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..20781ba --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 804.75 ± 0.44 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.16 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c48fbb5 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 369.35 ± 42.57 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.04 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..1ce39d9 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 826.54 ± 0.79 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.23 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..b3b65b6 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 371.28 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.58 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..906acce --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 513.70 ± 0.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..22fc859 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 115.37 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..64c398a --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 527.23 ± 0.03 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..489bab2 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 95.21 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.09 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..b1e0c7c --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 528.54 ± 0.37 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..af4c4a3 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 199.09 ± 3.44 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log new file mode 100644 index 0000000..64bb317 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log @@ -0,0 +1,4 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..ce8e80c --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2106.39 ± 2.40 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.35 ± 0.02 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..efc4431 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1185.65 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.89 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..4380d71 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2111.57 ± 5.75 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.04 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6120e6e --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1190.60 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.04 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..593579b --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2154.09 ± 4.72 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.41 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..afca8d4 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1291.24 ± 6.88 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.61 ± 0.03 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..485f1c6 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 673.38 ± 9.06 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 37.47 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..bf9bed5 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 332.86 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.37 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..3ef62f3 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 680.04 ± 3.32 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 37.34 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2c76d4b --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.06 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.92 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..b639356 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 674.51 ± 4.94 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 37.42 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..921057f --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 335.42 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.24 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..8bbdcf1 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 675.94 ± 0.23 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.09 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ab47309 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 326.59 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.34 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..4de5343 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.77 ± 57.43 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.85 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..448a686 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.50 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.84 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..4858f77 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 657.18 ± 7.22 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.14 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..f1126a0 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 336.48 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.80 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..07af172 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1438.05 ± 10.26 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 28.37 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e6da731 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 555.09 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.31 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..2c72757 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1450.79 ± 15.08 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 28.27 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..19c4a09 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 357.94 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.86 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..7c7b5b2 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1399.32 ± 6.94 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 28.35 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a04a115 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 556.09 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.24 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..a264ea3 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1385.54 ± 24.93 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.72 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..fa44d9e --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 544.55 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.36 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..fcfb2c8 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1419.39 ± 12.17 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.24 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..0696210 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 353.60 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.87 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..bd543ef --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1365.89 ± 19.13 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.49 ± 0.01 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cefe08d --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 456.33 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.03 ± 0.00 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log new file mode 100644 index 0000000..411b668 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1008.52 ± 2.07 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.29 ± 0.02 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e6aeba6 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 53.92 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | + +build: 12bb5c37 (7074) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..d4fe6c7 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1012.09 ± 1.56 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.61 ± 0.01 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8ef5a26 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 46.59 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | + +build: 4db63cdde (7085) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..82a2779 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1017.17 ± 2.70 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.53 ± 0.01 | + +build: 4fc43d43d (7085) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6d86c48 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f056f2a5565] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f056f2a592b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f056f2a5aaf] +/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f0571c1ef12] +/usr/local/lib64/libggml-hip.so.0(+0x28c1f64) [0x7f0571c23f64] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f056f2bc8ce] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f05722f1950] +/usr/local/bin/llama-bench() [0x40a3fc] +/usr/local/bin/llama-bench() [0x40816d] +/lib64/libc.so.6(+0x35b5) [0x7f056ec3b5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f056ec3b668] +/usr/local/bin/llama-bench() [0x409255] +✖ ! [rocm-7alpha] llama-2-7b.Q4_0__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/docs/assets/index2.css b/docs/assets/index2.css index b483a53..0f1a98f 100644 --- a/docs/assets/index2.css +++ b/docs/assets/index2.css @@ -149,6 +149,7 @@ select { gap: 6px; font-size: 12px; color: var(--ink); + text-transform: none; } .backend-item input { @@ -161,10 +162,29 @@ select { border-radius: 999px; background: #eef2ff; color: #1d3ea5; - text-transform: uppercase; transform: translateY(-2px); } +.backend-item .tag.tag-hblt0 { + background: #e9edff; + color: #1d3ea5; +} + +.backend-item .tag.tag-rocwmma { + background: #eef9ff; + color: #0a517a; +} + +.backend-item .tag.tag-rocwmma-improved { + background: #faf3ff; + color: #6b1fb7; +} + +.backend-item .tag.tag-improved { + background: #fef9e7; + color: #8a5a00; +} + .stats-box { margin-left: auto; display: flex; @@ -269,6 +289,142 @@ td.model { font-weight: 500; } +td.model .model-head { + display: flex; + align-items: center; + flex-wrap: wrap; + gap: 6px; +} + +.model-pill { + display: inline-flex; + align-items: center; + padding: 2px 8px; + border-radius: 999px; + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.05em; + background: #eceff5; + color: #27303f; + border: 1px solid transparent; +} + +.model-pill-rpc { + background: #fdf2f8; + border-color: #fbcfe8; + color: #9d174d; +} + +.model-pill-rocwmma { + background: #eef9ff; + border-color: #c7e9ff; + color: #0a517a; +} + +.legend { + display: flex; + flex-direction: column; + gap: 6px; + margin-top: 8px; +} + +.legend label { + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--muted); +} + +.legend-pills { + display: flex; + flex-wrap: wrap; + gap: 8px; +} + +.legend-pill { + display: inline-flex; + align-items: center; + gap: 4px; + border-radius: 999px; + border: 1px solid transparent; + background: #e9edff; + color: var(--ink); +} + +.legend-pill-default { + background: #e9edff; + color: var(--ink); +} + +.legend-pill-rpc { + background: #fdf2f8; + border-color: #fbcfe8; + color: #9d174d; +} + +.legend-pill-rocwmma { + background: #eef9ff; + border-color: #c7e9ff; + color: #0a517a; +} + +.legend-pill-rocwmma-improved { + background: #faf3ff; + border-color: #e0c8ff; + color: #6b1fb7; +} + +.modal.hidden { + display: none; +} + +.modal { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.5); + display: flex; + align-items: center; + justify-content: center; + padding: 20px; + z-index: 1000; +} + +.modal-content { + background: #fff; + border-radius: 12px; + padding: 20px 24px; + max-width: 520px; + width: 100%; + box-shadow: 0 12px 50px rgba(0, 0, 0, 0.2); + position: relative; + font-size: 13px; + line-height: 1.4; +} + +.modal-content h2 { + margin-top: 0; + font-size: 16px; +} + +.modal-content p { + margin: 8px 0; +} + +.modal-close { + position: absolute; + top: 8px; + right: 10px; + border: none; + background: transparent; + font-size: 20px; + cursor: pointer; + color: var(--muted); +} + +.modal-close:hover { + color: var(--ink); +} + .data-cell { white-space: normal; position: relative; @@ -501,3 +657,10 @@ th.backend-header.drop-target { color: var(--muted); margin-top: 4px; } +.modal-content code { + font-family: "JetBrains Mono", "SFMono-Regular", Consolas, monospace; + background: #f6f8fc; + padding: 1px 4px; + border-radius: 4px; + font-size: 12px; +} diff --git a/docs/assets/index2.js b/docs/assets/index2.js index 3837fb3..e8fd973 100644 --- a/docs/assets/index2.js +++ b/docs/assets/index2.js @@ -25,6 +25,7 @@ const state = { document.addEventListener("DOMContentLoaded", async () => { cacheUI(); + setupModals(); try { const res = await fetch("results.json"); const data = await res.json(); @@ -53,9 +54,62 @@ function cacheUI() { stats: document.getElementById("stats-line"), resetBtn: document.getElementById("reset-layout"), tables: document.getElementById("tables"), + hipblasModalOpen: document.getElementById("hipblas-modal-open"), + hipblasModal: document.getElementById("hipblas-modal"), + hipblasModalClose: document.getElementById("hipblas-modal-close"), + rpcModalOpen: document.getElementById("rpc-modal-open"), + rpcModal: document.getElementById("rpc-modal"), + rpcModalClose: document.getElementById("rpc-modal-close"), + rocwmmaModalOpen: document.getElementById("rocwmma-modal-open"), + rocwmmaModal: document.getElementById("rocwmma-modal"), + rocwmmaModalClose: document.getElementById("rocwmma-modal-close"), + rocwmmaImprModalOpen: document.getElementById("rocwmma-impr-modal-open"), + rocwmmaImprModal: document.getElementById("rocwmma-impr-modal"), + rocwmmaImprModalClose: document.getElementById("rocwmma-impr-modal-close"), }; } +function setupModals() { + const modalConfigs = [ + { + open: state.ui.hipblasModalOpen, + modal: state.ui.hipblasModal, + close: state.ui.hipblasModalClose, + }, + { + open: state.ui.rpcModalOpen, + modal: state.ui.rpcModal, + close: state.ui.rpcModalClose, + }, + { + open: state.ui.rocwmmaModalOpen, + modal: state.ui.rocwmmaModal, + close: state.ui.rocwmmaModalClose, + }, + { + open: state.ui.rocwmmaImprModalOpen, + modal: state.ui.rocwmmaImprModal, + close: state.ui.rocwmmaImprModalClose, + }, + ]; + + modalConfigs.forEach(({ open, modal, close }) => { + if (!open || !modal) return; + const openModal = () => modal.classList.remove("hidden"); + const closeModal = () => modal.classList.add("hidden"); + open.addEventListener("click", openModal); + close?.addEventListener("click", closeModal); + modal.addEventListener("click", (e) => { + if (e.target === modal) closeModal(); + }); + document.addEventListener("keydown", (e) => { + if (e.key === "Escape" && !modal.classList.contains("hidden")) { + closeModal(); + } + }); + }); +} + function prepareData(runs) { const contextMap = new Map(); const envSet = new Set(); @@ -132,6 +186,7 @@ function ensureModel(testEntry, modelName, run) { quant: (run.quant || "Unknown").toUpperCase(), sizeB: run.name_params_b ?? run.params_b ?? null, backends: {}, + isRpc: Boolean(run.rpc), search_blob: [modelName, run.quant, run.env, run.test] .filter(Boolean) .map((s) => s.toString().toLowerCase()) @@ -147,6 +202,12 @@ function ensureModel(testEntry, modelName, run) { state.sizeStats.min = Math.min(state.sizeStats.min, row.sizeB); state.sizeStats.max = Math.max(state.sizeStats.max, row.sizeB); } + if (run.rpc) { + row.isRpc = true; + if (!row.search_blob.includes("rpc")) { + row.search_blob = `${row.search_blob} rpc`; + } + } return row; } @@ -259,6 +320,8 @@ function renderBackendList() { const pill = document.createElement("span"); pill.className = "tag"; pill.textContent = tag; + const safeTag = tag.replace(/[^a-z0-9]+/gi, "-").toLowerCase(); + pill.classList.add(`tag-${safeTag}`); label.appendChild(pill); }); @@ -393,7 +456,24 @@ function buildSingleTable(models, backendList) { const tr = document.createElement("tr"); const tdModel = document.createElement("td"); tdModel.className = "model"; - tdModel.innerHTML = `
${model.model}
${model.quant} · ${formatSize(model.sizeB)}
`; + const head = document.createElement("div"); + head.className = "model-head"; + const nameSpan = document.createElement("span"); + nameSpan.className = "model-name"; + nameSpan.textContent = model.model; + head.appendChild(nameSpan); + if (model.isRpc) { + const pill = document.createElement("span"); + pill.className = "model-pill model-pill-rpc"; + pill.title = "Run executed via llama.cpp RPC across two servers"; + pill.textContent = "RPC · dual server"; + head.appendChild(pill); + } + tdModel.appendChild(head); + const meta = document.createElement("div"); + meta.className = "meta"; + meta.textContent = `${model.quant} · ${formatSize(model.sizeB)}`; + tdModel.appendChild(meta); const actionWrap = document.createElement("div"); actionWrap.className = "row-actions"; @@ -586,14 +666,14 @@ function backendValue(entry, direction) { } function splitEnvName(env) { - const parts = env.split(/-(?=rocwmma|improved|hblt0)/g); - if (parts.length === 1) return { base: env, tags: [] }; - const base = parts[0]; - const tags = env - .slice(base.length) - .split("-") - .filter(Boolean) - .map((t) => t.toUpperCase()); + const canonical = env.replace(/_/g, "."); + const tagRegex = /-(rocwmma-improved|rocwmma|improved|hblt0)/gi; + const tags = []; + let match; + while ((match = tagRegex.exec(canonical)) !== null) { + tags.push(match[1].toLowerCase()); + } + const base = canonical.replace(tagRegex, ""); return { base, tags }; } diff --git a/docs/index.html b/docs/index.html index a022c27..5bfab2b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -15,6 +15,23 @@

Fedora 42 · Linux 6.18.0-0.rc5.243.vanilla.fc42.x86_64 · llama.cpp build 1c398dc9e (7034)

Benchmarks captured 14 Nov 2025 · Repo: kyuz0/amd-strix-halo-toolboxes

+
+ +
+ + + + +
+
@@ -68,6 +85,56 @@
+ + + + + + + + diff --git a/docs/results.json b/docs/results.json index 392bd1a..7599ca6 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,8 +1,16 @@ { "meta": { - "generated_at": "2025-11-15T08:24:40Z", + "generated_at": "2025-11-17T22:56:32Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ + { + "hash": "0a3857fe0", + "number": "7089" + }, + { + "hash": "12bb5c37", + "number": "7074" + }, { "hash": "1c398dc9e", "number": "7034" @@ -11,19 +19,58 @@ "hash": "31df4608", "number": "7038" }, + { + "hash": "4db63cdde", + "number": "7085" + }, + { + "hash": "4fc43d43d", + "number": "7085" + }, + { + "hash": "677be4d78", + "number": "7085" + }, + { + "hash": "86f1f4411", + "number": "7085" + }, + { + "hash": "ab5783eb4", + "number": "7089" + }, + { + "hash": "b447a9a4b", + "number": "7085" + }, { "hash": "bca95ca51", "number": "7036" }, + { + "hash": "caca0d55c", + "number": "7085" + }, { "hash": "ee8dd5c65", "number": "7035" + }, + { + "hash": "f1840a25d", + "number": "7085" + }, + { + "hash": "fa5c85a8b", + "number": "7085" } ], "environments": [ "rocm-7alpha", + "rocm-7alpha-hblt0", "rocm-7alpha-rocwmma", + "rocm-7alpha-rocwmma-hblt0", "rocm-7alpha-rocwmma-improved", + "rocm-7alpha-rocwmma-improved-hblt0", "rocm6_4_4", "rocm6_4_4-hblt0", "rocm6_4_4-rocwmma", @@ -32,6 +79,12 @@ "rocm7.1-hblt0", "rocm7.1-rocwmma", "rocm7.1-rocwmma-hblt0", + "rocm7_alpha", + "rocm7_alpha-hblt0", + "rocm7_alpha-rocwmma", + "rocm7_alpha-rocwmma-hblt0", + "rocm7_alpha-rocwmma-improved", + "rocm7_alpha-rocwmma-improved-hblt0", "rocm7_rc", "rocm7_rc-hblt0", "rocm7_rc-rocwmma", @@ -64,6 +117,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -91,6 +145,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -118,6 +173,88 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 167.68, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.85, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -142,6 +279,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -169,6 +307,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -196,8 +335,121 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 170.65, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 14.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", @@ -220,6 +472,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -247,6 +500,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -274,6 +528,88 @@ "name_params_b": null, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 171.42, + "tps_std": 0.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -298,6 +634,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -325,6 +662,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -352,6 +690,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -376,6 +715,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -403,6 +743,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -430,6 +771,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -454,6 +796,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -481,6 +824,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -508,6 +852,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -535,6 +880,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -562,6 +908,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -589,6 +936,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -616,6 +964,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -640,6 +989,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -667,6 +1017,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -694,6 +1045,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -718,6 +1070,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -745,6 +1098,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -772,6 +1126,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -796,6 +1151,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -823,6 +1179,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -850,6 +1207,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -877,6 +1235,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -904,6 +1263,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -931,6 +1291,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -958,6 +1319,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -985,6 +1347,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -1012,6 +1375,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -1039,6 +1403,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -1066,6 +1431,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -1090,6 +1456,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -1117,6 +1484,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -1144,6 +1512,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -1168,6 +1537,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1195,6 +1565,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1222,6 +1593,7 @@ "name_params_b": null, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -1246,6 +1618,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1273,6 +1646,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1300,6 +1674,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1327,6 +1702,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1354,6 +1730,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1381,6 +1758,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1408,6 +1786,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1435,6 +1814,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1462,6 +1842,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1489,6 +1870,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1516,6 +1898,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1543,6 +1926,7 @@ "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1570,6 +1954,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -1597,6 +1982,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -1624,6 +2010,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -1651,11 +2038,124 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 147.75, + "tps_std": 0.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", @@ -1678,6 +2178,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1705,6 +2206,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1732,8 +2234,121 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 140.67, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 14.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", @@ -1756,6 +2371,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1783,6 +2399,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1810,6 +2427,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1837,11 +2455,93 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 151.03, + "tps_std": 0.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": null + }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", @@ -1864,6 +2564,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1891,6 +2592,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1918,6 +2620,7 @@ "name_params_b": null, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -1942,6 +2645,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1969,6 +2673,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -1996,6 +2701,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2020,6 +2726,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2047,6 +2754,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2074,6 +2782,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2101,6 +2810,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2128,6 +2838,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2155,6 +2866,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2182,6 +2894,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2206,6 +2919,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2233,6 +2947,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2260,6 +2975,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2284,6 +3000,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2311,6 +3028,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2338,6 +3056,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2362,6 +3081,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2389,6 +3109,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2416,6 +3137,7 @@ "name_params_b": null, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2440,6 +3162,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2467,6 +3190,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -2494,6 +3218,7 @@ "name_params_b": null, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2518,6 +3243,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -2545,6 +3271,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -2572,6 +3299,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -2599,6 +3327,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -2626,6 +3355,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -2653,6 +3383,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -2680,6 +3411,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2704,6 +3436,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2731,6 +3464,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2758,6 +3492,7 @@ "name_params_b": null, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2782,6 +3517,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2809,6 +3545,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2836,6 +3573,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -2860,6 +3598,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2887,6 +3626,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2914,6 +3654,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2941,6 +3682,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2968,6 +3710,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -2995,6 +3738,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3022,6 +3766,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3049,6 +3794,7 @@ "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3076,6 +3822,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -3103,6 +3850,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -3130,6 +3878,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -3157,11 +3906,124 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 101.51, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 19.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", @@ -3184,6 +4046,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3211,6 +4074,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3238,6 +4102,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3265,11 +4130,93 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 103.07, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 12.71, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": null + }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", @@ -3292,6 +4239,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3319,6 +4267,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3346,6 +4295,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3373,11 +4323,124 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 102.84, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 34.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", @@ -3400,6 +4463,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3427,6 +4491,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3454,6 +4519,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -3478,6 +4544,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3505,6 +4572,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3532,6 +4600,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -3556,6 +4625,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3583,6 +4653,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3610,6 +4681,7 @@ "name_params_b": 70.0, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -3634,6 +4706,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3661,6 +4734,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3688,6 +4762,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3715,6 +4790,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -3742,6 +4818,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3769,6 +4846,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3796,6 +4874,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3823,6 +4902,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3850,6 +4930,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3877,6 +4958,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3904,6 +4986,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3931,6 +5014,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3958,6 +5042,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -3985,6 +5070,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -4012,6 +5098,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -4039,6 +5126,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -4066,6 +5154,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -4093,6 +5182,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -4120,6 +5210,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -4147,6 +5238,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -4174,6 +5266,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -4201,6 +5294,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -4228,6 +5322,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -4252,6 +5347,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -4279,6 +5375,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -4306,6 +5403,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -4333,6 +5431,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -4360,6 +5459,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4387,6 +5487,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4414,6 +5515,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4441,6 +5543,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4468,6 +5571,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4495,6 +5599,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4522,6 +5627,7 @@ "name_params_b": 70.0, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -4546,6 +5652,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4573,6 +5680,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4600,6 +5708,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4627,6 +5736,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4654,6 +5764,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4681,6 +5792,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4708,6 +5820,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4735,6 +5848,7 @@ "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4762,6 +5876,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -4789,6 +5904,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -4816,8 +5932,90 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": null }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 152.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.29, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -4840,6 +6038,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4867,6 +6066,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4894,6 +6094,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4921,11 +6122,93 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 274.07, + "tps_std": 3.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 109.44, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": null + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -4948,6 +6231,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -4975,6 +6259,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5002,6 +6287,57 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -5026,6 +6362,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5053,6 +6390,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5080,6 +6418,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5107,6 +6446,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5134,6 +6474,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5161,6 +6502,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5188,6 +6530,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -5212,6 +6555,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5239,6 +6583,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5266,6 +6611,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5293,6 +6639,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5320,6 +6667,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5347,6 +6695,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5374,6 +6723,7 @@ "name_params_b": 17.0, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -5398,6 +6748,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5425,6 +6776,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5452,6 +6804,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5479,6 +6832,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5506,6 +6860,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5533,6 +6888,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5560,6 +6916,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -5584,6 +6941,7 @@ "name_params_b": 17.0, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": null }, { @@ -5608,6 +6966,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -5632,6 +6991,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5659,6 +7019,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5686,6 +7047,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5713,6 +7075,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -5740,6 +7103,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5767,6 +7131,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5794,6 +7159,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5821,6 +7187,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5848,6 +7215,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5875,6 +7243,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5902,6 +7271,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5929,6 +7299,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -5956,6 +7327,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -5983,6 +7355,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6010,6 +7383,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6037,6 +7411,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6064,6 +7439,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6091,6 +7467,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6118,6 +7495,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6145,6 +7523,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6172,6 +7551,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6199,6 +7579,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6226,6 +7607,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6253,6 +7635,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6280,6 +7663,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6307,6 +7691,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6334,6 +7719,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6361,6 +7747,7 @@ "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6388,6 +7775,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -6415,6 +7803,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -6442,6 +7831,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -6469,11 +7859,93 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 271.67, + "tps_std": 1.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.13, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 153.04, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": null + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", @@ -6496,6 +7968,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6523,6 +7996,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6550,6 +8024,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6577,11 +8052,124 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 269.91, + "tps_std": 0.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.11, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 107.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", @@ -6604,6 +8192,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6631,6 +8220,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6658,6 +8248,57 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -6682,6 +8323,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6709,6 +8351,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6736,6 +8379,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -6760,6 +8404,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6787,6 +8432,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6814,6 +8460,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -6838,6 +8485,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6865,6 +8513,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6892,6 +8541,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6919,6 +8569,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6946,6 +8597,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -6973,6 +8625,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7000,6 +8653,7 @@ "name_params_b": 17.0, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -7024,6 +8678,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7051,6 +8706,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7078,6 +8734,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7105,6 +8762,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7132,6 +8790,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7159,6 +8818,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7186,6 +8846,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7213,6 +8874,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7240,6 +8902,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7267,6 +8930,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7294,6 +8958,7 @@ "name_params_b": 17.0, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -7318,6 +8983,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7345,6 +9011,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7372,6 +9039,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7399,6 +9067,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -7426,6 +9095,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -7453,6 +9123,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -7480,6 +9151,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -7504,6 +9176,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -7531,6 +9204,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -7558,6 +9232,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -7585,6 +9260,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -7612,6 +9288,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7639,6 +9316,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7666,6 +9344,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7693,6 +9372,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7720,6 +9400,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7747,6 +9428,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7774,6 +9456,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7801,6 +9484,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7828,6 +9512,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7855,6 +9540,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7882,6 +9568,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7909,6 +9596,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7936,6 +9624,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7963,6 +9652,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -7990,6 +9680,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8017,6 +9708,7 @@ "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8044,6 +9736,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -8071,6 +9764,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -8098,6 +9792,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -8125,11 +9820,93 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 312.46, + "tps_std": 3.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": null + }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", @@ -8152,6 +9929,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8179,6 +9957,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8206,6 +9985,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8233,11 +10013,124 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 313.81, + "tps_std": 0.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 109.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", @@ -8260,6 +10153,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8287,6 +10181,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8314,6 +10209,88 @@ "name_params_b": 17.0, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 315.62, + "tps_std": 2.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -8338,6 +10315,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8365,6 +10343,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8392,6 +10371,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8419,6 +10399,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8446,6 +10427,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8473,6 +10455,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8500,6 +10483,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8527,6 +10511,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8554,6 +10539,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8581,6 +10567,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8608,6 +10595,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8635,6 +10623,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8662,6 +10651,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8689,6 +10679,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8716,6 +10707,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8743,6 +10735,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -8770,6 +10763,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8797,6 +10791,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8824,6 +10819,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8851,6 +10847,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8878,6 +10875,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8905,6 +10903,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8932,6 +10931,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8959,6 +10959,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -8986,6 +10987,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9013,6 +11015,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9040,6 +11043,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9067,6 +11071,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9094,6 +11099,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9121,6 +11127,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9148,6 +11155,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9175,6 +11183,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -9202,6 +11211,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9229,6 +11239,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9256,6 +11267,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9283,6 +11295,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9310,6 +11323,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9337,6 +11351,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9364,6 +11379,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9391,6 +11407,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -9418,6 +11435,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9445,6 +11463,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9472,6 +11491,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9499,6 +11519,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9526,6 +11547,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9553,6 +11575,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9580,6 +11603,7 @@ "name_params_b": 17.0, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -9604,6 +11628,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9631,6 +11656,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9658,6 +11684,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9685,6 +11712,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9712,6 +11740,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9739,6 +11768,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9766,6 +11796,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9793,6 +11824,7 @@ "name_params_b": 107.77, "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9820,6 +11852,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -9847,6 +11880,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -9874,6 +11908,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -9901,11 +11936,124 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 140.4, + "tps_std": 0.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.93, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 49.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", @@ -9928,6 +12076,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9955,6 +12104,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -9982,6 +12132,88 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 142.52, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.13, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 29.46, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -10006,6 +12238,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10033,6 +12266,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10060,6 +12294,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10087,11 +12322,124 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 140.69, + "tps_std": 0.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.07, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 38.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", @@ -10114,6 +12462,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10141,6 +12490,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10168,6 +12518,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -10192,6 +12543,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10219,6 +12571,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10246,6 +12599,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -10270,6 +12624,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10297,6 +12652,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10324,6 +12680,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10351,6 +12708,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10378,6 +12736,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10405,6 +12764,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10432,6 +12792,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10459,6 +12820,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -10486,6 +12848,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10513,6 +12876,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10540,6 +12904,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -10564,6 +12929,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10591,6 +12957,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10618,6 +12985,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10645,6 +13013,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10672,6 +13041,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10699,6 +13069,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10726,6 +13097,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10753,6 +13125,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10780,6 +13153,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10807,6 +13181,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10834,6 +13209,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10861,6 +13237,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -10888,6 +13265,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -10915,6 +13293,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -10942,6 +13321,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -10969,6 +13349,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -10996,6 +13377,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -11023,6 +13405,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -11050,6 +13433,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -11074,6 +13458,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11101,6 +13486,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11128,6 +13514,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11155,6 +13542,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11182,6 +13570,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11209,6 +13598,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11236,6 +13626,7 @@ "name_params_b": 235.0, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -11260,6 +13651,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11287,6 +13679,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11314,6 +13707,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11341,6 +13735,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11368,6 +13763,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11395,6 +13791,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11422,6 +13819,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11449,6 +13847,7 @@ "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11476,6 +13875,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -11503,6 +13903,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -11530,6 +13931,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -11557,11 +13959,124 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 352.23, + "tps_std": 9.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 192.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", @@ -11584,6 +14099,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11611,6 +14127,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11638,6 +14155,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11665,11 +14183,93 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 345.22, + "tps_std": 23.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.84, + "tps_std": 0.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 135.26, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": null + }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", @@ -11692,6 +14292,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11719,6 +14320,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11746,6 +14348,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11773,11 +14376,124 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 360.93, + "tps_std": 3.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 197.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", @@ -11800,6 +14516,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11827,6 +14544,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11854,6 +14572,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11881,6 +14600,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11908,6 +14628,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11935,6 +14656,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11962,6 +14684,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -11989,6 +14712,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12016,6 +14740,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12043,6 +14768,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12070,6 +14796,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12097,6 +14824,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12124,6 +14852,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12151,6 +14880,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12178,6 +14908,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12205,6 +14936,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12232,6 +14964,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12259,6 +14992,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12286,6 +15020,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12313,6 +15048,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12340,6 +15076,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12367,6 +15104,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12394,6 +15132,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12421,6 +15160,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12448,6 +15188,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12475,6 +15216,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12502,6 +15244,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12529,6 +15272,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12556,6 +15300,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12583,6 +15328,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12610,6 +15356,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12637,6 +15384,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -12664,6 +15412,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12691,6 +15440,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12718,6 +15468,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12745,6 +15496,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12772,6 +15524,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12799,6 +15552,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12826,6 +15580,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12853,6 +15608,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -12880,6 +15636,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12907,6 +15664,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12934,6 +15692,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12961,6 +15720,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -12988,6 +15748,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13015,6 +15776,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13042,6 +15804,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13069,6 +15832,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13096,6 +15860,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13123,6 +15888,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13150,6 +15916,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13177,6 +15944,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13204,6 +15972,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13231,6 +16000,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13258,6 +16028,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13285,6 +16056,7 @@ "name_params_b": 30.53, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13312,6 +16084,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -13339,6 +16112,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -13366,6 +16140,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -13393,11 +16168,124 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 579.57, + "tps_std": 12.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 202.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", @@ -13420,6 +16308,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13447,6 +16336,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13474,6 +16364,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13501,11 +16392,124 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 575.31, + "tps_std": 5.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.66, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 145.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", @@ -13528,6 +16532,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13555,6 +16560,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13582,6 +16588,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13609,11 +16616,124 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 576.33, + "tps_std": 7.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.48, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 160.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", @@ -13636,6 +16756,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13663,6 +16784,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13690,6 +16812,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13717,6 +16840,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13744,6 +16868,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13771,6 +16896,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13798,6 +16924,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13825,6 +16952,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13852,6 +16980,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13879,6 +17008,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13906,6 +17036,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13933,6 +17064,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13960,6 +17092,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -13987,6 +17120,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14014,6 +17148,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14041,6 +17176,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14068,6 +17204,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14095,6 +17232,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14122,6 +17260,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14149,6 +17288,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14176,6 +17316,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14203,6 +17344,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14230,6 +17372,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14257,6 +17400,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14284,6 +17428,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14311,6 +17456,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14338,6 +17484,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14365,6 +17512,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14392,6 +17540,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14419,6 +17568,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14446,6 +17596,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14473,6 +17624,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -14500,6 +17652,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14527,6 +17680,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14554,6 +17708,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14581,6 +17736,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14608,6 +17764,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14635,6 +17792,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14662,6 +17820,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14689,6 +17848,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -14716,6 +17876,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14743,6 +17904,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14770,6 +17932,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14797,6 +17960,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14824,6 +17988,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14851,6 +18016,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14878,6 +18044,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14905,6 +18072,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14932,6 +18100,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14959,6 +18128,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -14986,6 +18156,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15013,6 +18184,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15040,6 +18212,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15067,6 +18240,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15094,6 +18268,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15121,6 +18296,7 @@ "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15148,6 +18324,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -15175,6 +18352,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -15202,6 +18380,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -15229,11 +18408,124 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 669.29, + "tps_std": 4.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 71.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -15256,6 +18548,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15283,6 +18576,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15310,6 +18604,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15337,11 +18632,124 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 666.63, + "tps_std": 5.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 71.62, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 148.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -15364,6 +18772,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15391,6 +18800,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15418,6 +18828,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15445,11 +18856,124 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 676.38, + "tps_std": 1.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 71.44, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 160.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -15472,6 +18996,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15499,6 +19024,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15526,6 +19052,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15553,6 +19080,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15580,6 +19108,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15607,6 +19136,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15634,6 +19164,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15661,6 +19192,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15688,6 +19220,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15715,6 +19248,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15742,6 +19276,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15769,6 +19304,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15796,6 +19332,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15823,6 +19360,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15850,6 +19388,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15877,6 +19416,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -15904,6 +19444,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -15931,6 +19472,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -15958,6 +19500,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -15985,6 +19528,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16012,6 +19556,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16039,6 +19584,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16066,6 +19612,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16093,6 +19640,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16120,6 +19668,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16147,6 +19696,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16174,6 +19724,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16201,6 +19752,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16228,6 +19780,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16255,6 +19808,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16282,6 +19836,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16309,6 +19864,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -16336,6 +19892,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16363,6 +19920,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16390,6 +19948,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16417,6 +19976,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16444,6 +20004,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16471,6 +20032,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16498,6 +20060,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16525,6 +20088,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -16552,6 +20116,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16579,6 +20144,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16606,6 +20172,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16633,6 +20200,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16660,6 +20228,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16687,6 +20256,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16714,6 +20284,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16741,6 +20312,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16768,6 +20340,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16795,6 +20368,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16822,6 +20396,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16849,6 +20424,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16876,6 +20452,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16903,6 +20480,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16930,6 +20508,7 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -16957,11 +20536,2252 @@ "name_params_b": 30.53, "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 602.73, + "tps_std": 3.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.21, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 201.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 587.21, + "tps_std": 4.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 200.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 601.39, + "tps_std": 7.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 145.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 585.7, + "tps_std": 2.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 148.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 601.34, + "tps_std": 1.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 160.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 585.58, + "tps_std": 4.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.38, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 163.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 652.89, + "tps_std": 1.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 110.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 638.38, + "tps_std": 7.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 108.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 648.39, + "tps_std": 23.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.52, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 218.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 640.53, + "tps_std": 6.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.87, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 207.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 650.26, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.8, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 634.84, + "tps_std": 9.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 649.99, + "tps_std": 3.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.58, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 166.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 640.61, + "tps_std": 7.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 171.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 648.21, + "tps_std": 4.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 631.07, + "tps_std": 4.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.89, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 654.79, + "tps_std": 1.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.22, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 237.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 633.61, + "tps_std": 5.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.67, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 221.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1152.51, + "tps_std": 1.98, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "ab5783eb4", + "number": "7089" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.58, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "ab5783eb4", + "number": "7089" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 71.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "ab5783eb4", + "number": "7089" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "ab5783eb4", + "number": "7089" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 916.61, + "tps_std": 3.21, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0a3857fe0", + "number": "7089" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0a3857fe0", + "number": "7089" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 108.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0a3857fe0", + "number": "7089" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 33.51, + "name_params_b": 30.53, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0a3857fe0", + "number": "7089" + } + }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", @@ -16984,6 +22804,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -17011,6 +22832,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -17038,6 +22860,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -17065,11 +22888,124 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 799.75, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 335.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", @@ -17092,6 +23028,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17119,6 +23056,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17146,6 +23084,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17173,11 +23112,124 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 804.75, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 369.35, + "tps_std": 42.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.04, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", @@ -17200,6 +23252,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17227,6 +23280,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17254,6 +23308,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17281,11 +23336,124 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 826.54, + "tps_std": 0.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 371.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", @@ -17308,6 +23476,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17335,6 +23504,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17362,6 +23532,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17389,6 +23560,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17416,6 +23588,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17443,6 +23616,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17470,6 +23644,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17497,6 +23672,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17524,6 +23700,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17551,6 +23728,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17578,6 +23756,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17605,6 +23784,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17632,6 +23812,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17659,6 +23840,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17686,6 +23868,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17713,6 +23896,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -17740,6 +23924,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17767,6 +23952,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17794,6 +23980,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17821,6 +24008,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17848,6 +24036,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17875,6 +24064,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17902,6 +24092,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17929,6 +24120,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17956,6 +24148,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -17983,6 +24176,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -18010,6 +24204,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -18037,6 +24232,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -18064,6 +24260,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -18091,6 +24288,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -18118,6 +24316,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -18145,6 +24344,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -18172,6 +24372,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18199,6 +24400,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18226,6 +24428,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18253,6 +24456,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18280,6 +24484,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18307,6 +24512,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18334,6 +24540,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18361,6 +24568,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -18388,6 +24596,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18415,6 +24624,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18442,6 +24652,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18469,6 +24680,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18496,6 +24708,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18523,6 +24736,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18550,6 +24764,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18577,6 +24792,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18604,6 +24820,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18631,6 +24848,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18658,6 +24876,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18685,6 +24904,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18712,6 +24932,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18739,6 +24960,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18766,6 +24988,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18793,6 +25016,7 @@ "name_params_b": 11.77, "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18820,6 +25044,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -18847,6 +25072,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -18874,6 +25100,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -18901,11 +25128,124 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 513.7, + "tps_std": 0.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 115.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -18928,6 +25268,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18955,6 +25296,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -18982,6 +25324,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19009,11 +25352,124 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 527.23, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 95.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -19036,6 +25492,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19063,6 +25520,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19090,6 +25548,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19117,11 +25576,124 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 528.54, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 199.09, + "tps_std": 3.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -19144,6 +25716,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19171,6 +25744,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19198,6 +25772,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19225,6 +25800,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19252,6 +25828,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19279,6 +25856,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19306,6 +25884,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19333,6 +25912,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19360,6 +25940,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19387,6 +25968,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19414,6 +25996,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19441,6 +26024,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19468,6 +26052,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19495,6 +26080,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19522,6 +26108,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19549,6 +26136,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -19576,6 +26164,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19603,6 +26192,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19630,6 +26220,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19657,6 +26248,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19684,6 +26276,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19711,6 +26304,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19738,6 +26332,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19765,6 +26360,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19792,6 +26388,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19819,6 +26416,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19846,6 +26444,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19873,6 +26472,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19900,6 +26500,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19927,6 +26528,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19954,6 +26556,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -19981,6 +26584,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -20008,6 +26612,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -20035,6 +26640,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -20062,6 +26668,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -20089,6 +26696,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -20116,6 +26724,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -20143,6 +26752,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -20170,6 +26780,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -20197,11 +26808,37 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" } }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", + "rpc": false, + "build": null + }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -20224,6 +26861,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20251,6 +26889,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20278,6 +26917,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20305,6 +26945,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20332,6 +26973,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20359,6 +27001,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20386,6 +27029,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20413,6 +27057,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20440,6 +27085,7 @@ "name_params_b": null, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, "build": null }, { @@ -20464,6 +27110,7 @@ "name_params_b": null, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": null }, { @@ -20488,6 +27135,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20515,6 +27163,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20542,6 +27191,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20569,6 +27219,7 @@ "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20596,6 +27247,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -20623,6 +27275,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -20650,6 +27303,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -20677,11 +27331,124 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2106.39, + "tps_std": 2.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 84.35, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1185.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 59.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", @@ -20704,6 +27471,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20731,6 +27499,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20758,6 +27527,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20785,11 +27555,124 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2111.57, + "tps_std": 5.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 81.04, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1190.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", @@ -20812,6 +27695,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20839,6 +27723,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20866,6 +27751,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20893,11 +27779,124 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2154.09, + "tps_std": 4.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 84.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1291.24, + "tps_std": 6.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 59.61, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", @@ -20920,6 +27919,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20947,6 +27947,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -20974,6 +27975,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21001,6 +28003,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21028,6 +28031,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21055,6 +28059,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21082,6 +28087,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21109,6 +28115,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21136,6 +28143,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21163,6 +28171,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21190,6 +28199,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21217,6 +28227,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21244,6 +28255,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21271,6 +28283,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21298,6 +28311,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21325,6 +28339,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -21352,6 +28367,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21379,6 +28395,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21406,6 +28423,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21433,6 +28451,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21460,6 +28479,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21487,6 +28507,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21514,6 +28535,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21541,6 +28563,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21568,6 +28591,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21595,6 +28619,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21622,6 +28647,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21649,6 +28675,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21676,6 +28703,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21703,6 +28731,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21730,6 +28759,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21757,6 +28787,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -21784,6 +28815,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -21811,6 +28843,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -21838,6 +28871,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -21865,6 +28899,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -21892,6 +28927,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -21919,6 +28955,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -21946,6 +28983,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -21973,6 +29011,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -22000,6 +29039,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22027,6 +29067,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22054,6 +29095,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22081,6 +29123,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22108,6 +29151,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22135,6 +29179,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22162,6 +29207,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22189,6 +29235,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22216,6 +29263,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22243,6 +29291,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22270,6 +29319,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22297,6 +29347,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22324,6 +29375,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22351,6 +29403,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22378,6 +29431,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22405,6 +29459,7 @@ "name_params_b": 3.88, "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22432,6 +29487,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -22459,6 +29515,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -22486,6 +29543,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -22513,11 +29571,124 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 673.38, + "tps_std": 9.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 332.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", @@ -22540,6 +29711,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22567,6 +29739,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22594,6 +29767,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22621,11 +29795,124 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 680.04, + "tps_std": 3.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 224.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", @@ -22648,6 +29935,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22675,6 +29963,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22702,6 +29991,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22729,11 +30019,124 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 674.51, + "tps_std": 4.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 335.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", @@ -22756,6 +30159,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22783,6 +30187,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22810,6 +30215,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22837,6 +30243,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22864,6 +30271,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22891,6 +30299,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22918,6 +30327,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22945,6 +30355,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22972,6 +30383,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -22999,6 +30411,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23026,6 +30439,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23053,6 +30467,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23080,6 +30495,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23107,6 +30523,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23134,6 +30551,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23161,6 +30579,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23188,6 +30607,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23215,6 +30635,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23242,6 +30663,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23269,6 +30691,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23296,6 +30719,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23323,6 +30747,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23350,6 +30775,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23377,6 +30803,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23404,6 +30831,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23431,6 +30859,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23458,6 +30887,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23485,6 +30915,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23512,6 +30943,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23539,6 +30971,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23566,6 +30999,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23593,6 +31027,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -23620,6 +31055,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23647,6 +31083,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23674,6 +31111,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23701,6 +31139,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23728,6 +31167,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23755,6 +31195,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23782,6 +31223,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23809,6 +31251,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -23836,6 +31279,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23863,6 +31307,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23890,6 +31335,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23917,6 +31363,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23944,6 +31391,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23971,6 +31419,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -23998,6 +31447,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24025,6 +31475,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24052,6 +31503,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24079,6 +31531,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24106,6 +31559,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24133,6 +31587,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24160,6 +31615,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24187,6 +31643,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24214,6 +31671,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24241,6 +31699,7 @@ "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24268,6 +31727,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -24295,6 +31755,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -24322,6 +31783,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -24349,11 +31811,124 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 675.94, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 326.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 27.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -24376,6 +31951,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24403,6 +31979,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24430,6 +32007,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24457,11 +32035,124 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 639.77, + "tps_std": 57.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 224.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -24484,6 +32175,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24511,6 +32203,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24538,6 +32231,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24565,11 +32259,124 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 657.18, + "tps_std": 7.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 336.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 35.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -24592,6 +32399,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24619,6 +32427,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24646,6 +32455,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24673,6 +32483,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24700,6 +32511,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24727,6 +32539,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24754,6 +32567,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24781,6 +32595,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24808,6 +32623,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24835,6 +32651,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24862,6 +32679,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24889,6 +32707,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24916,6 +32735,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24943,6 +32763,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24970,6 +32791,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -24997,6 +32819,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25024,6 +32847,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25051,6 +32875,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25078,6 +32903,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25105,6 +32931,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25132,6 +32959,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25159,6 +32987,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25186,6 +33015,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25213,6 +33043,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25240,6 +33071,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25267,6 +33099,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25294,6 +33127,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25321,6 +33155,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25348,6 +33183,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25375,6 +33211,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25402,6 +33239,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25429,6 +33267,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -25456,6 +33295,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25483,6 +33323,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25510,6 +33351,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25537,6 +33379,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25564,6 +33407,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25591,6 +33435,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25618,6 +33463,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25645,6 +33491,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -25672,6 +33519,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25699,6 +33547,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25726,6 +33575,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25753,6 +33603,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25780,6 +33631,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25807,6 +33659,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25834,6 +33687,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25861,6 +33715,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25888,6 +33743,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25915,6 +33771,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25942,6 +33799,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25969,6 +33827,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -25996,6 +33855,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26023,6 +33883,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26050,6 +33911,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26077,6 +33939,7 @@ "name_params_b": 116.83, "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26104,6 +33967,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -26131,6 +33995,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -26158,6 +34023,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -26185,11 +34051,124 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1438.05, + "tps_std": 10.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 555.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 24.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", @@ -26212,6 +34191,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26239,6 +34219,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26266,6 +34247,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26293,11 +34275,124 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1450.79, + "tps_std": 15.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 357.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", @@ -26320,6 +34415,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26347,6 +34443,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26374,6 +34471,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26401,11 +34499,124 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1399.32, + "tps_std": 6.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 556.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 24.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", @@ -26428,6 +34639,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26455,6 +34667,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26482,6 +34695,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26509,6 +34723,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26536,6 +34751,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26563,6 +34779,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26590,6 +34807,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26617,6 +34835,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26644,6 +34863,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26671,6 +34891,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26698,6 +34919,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26725,6 +34947,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26752,6 +34975,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26779,6 +35003,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26806,6 +35031,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26833,6 +35059,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -26860,6 +35087,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -26887,6 +35115,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -26914,6 +35143,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -26941,6 +35171,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -26968,6 +35199,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -26995,6 +35227,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27022,6 +35255,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27049,6 +35283,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27076,6 +35311,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27103,6 +35339,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27130,6 +35367,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27157,6 +35395,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27184,6 +35423,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27211,6 +35451,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27238,6 +35479,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27265,6 +35507,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -27292,6 +35535,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27319,6 +35563,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27346,6 +35591,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27373,6 +35619,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27400,6 +35647,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27427,6 +35675,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27454,6 +35703,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27481,6 +35731,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -27508,6 +35759,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27535,6 +35787,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27562,6 +35815,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27589,6 +35843,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27616,6 +35871,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27643,6 +35899,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27670,6 +35927,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27697,6 +35955,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27724,6 +35983,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27751,6 +36011,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27778,6 +36039,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27805,6 +36067,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27832,6 +36095,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27859,6 +36123,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27886,6 +36151,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27913,6 +36179,7 @@ "name_params_b": 20.91, "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -27940,6 +36207,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -27967,6 +36235,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -27994,6 +36263,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -28021,11 +36291,124 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1385.54, + "tps_std": 24.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 544.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -28048,6 +36431,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28075,6 +36459,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28102,6 +36487,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28129,11 +36515,124 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1419.39, + "tps_std": 12.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.24, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 353.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -28156,6 +36655,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28183,6 +36683,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28210,6 +36711,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28237,11 +36739,124 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1365.89, + "tps_std": 19.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.49, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 456.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -28264,6 +36879,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28291,6 +36907,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28318,6 +36935,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28345,6 +36963,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28372,6 +36991,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28399,6 +37019,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28426,6 +37047,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28453,6 +37075,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28480,6 +37103,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28507,6 +37131,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28534,6 +37159,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28561,6 +37187,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28588,6 +37215,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28615,6 +37243,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28642,6 +37271,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28669,6 +37299,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -28696,6 +37327,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28723,6 +37355,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28750,6 +37383,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28777,6 +37411,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28804,6 +37439,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28831,6 +37467,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28858,6 +37495,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28885,6 +37523,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28912,6 +37551,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28939,6 +37579,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28966,6 +37607,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -28993,6 +37635,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -29020,6 +37663,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -29047,6 +37691,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -29074,6 +37719,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -29101,6 +37747,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -29128,6 +37775,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29155,6 +37803,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29182,6 +37831,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29209,6 +37859,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29236,6 +37887,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29263,6 +37915,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29290,6 +37943,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29317,6 +37971,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -29344,6 +37999,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29371,6 +38027,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29398,6 +38055,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29425,6 +38083,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29452,6 +38111,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29479,6 +38139,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29506,6 +38167,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29533,6 +38195,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29560,6 +38223,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29587,6 +38251,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29614,6 +38279,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29641,6 +38307,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29668,6 +38335,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29695,6 +38363,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29722,6 +38391,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29749,6 +38419,7 @@ "name_params_b": 20.91, "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29776,6 +38447,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -29803,6 +38475,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -29830,6 +38503,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" @@ -29857,11 +38531,124 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "31df4608", "number": "7038" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1008.52, + "tps_std": 2.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.29, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 53.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -29884,6 +38671,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29911,6 +38699,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29938,6 +38727,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -29965,11 +38755,124 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1012.09, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.61, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 46.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -29992,6 +38895,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30019,6 +38923,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30046,6 +38951,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30073,11 +38979,93 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1017.17, + "tps_std": 2.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.53, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": null + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -30100,6 +39088,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30127,6 +39116,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30154,6 +39144,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30181,6 +39172,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30208,6 +39200,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30235,6 +39228,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30262,6 +39256,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30289,6 +39284,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30316,6 +39312,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30343,6 +39340,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30370,6 +39368,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30397,6 +39396,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30424,6 +39424,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30451,6 +39452,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30478,6 +39480,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30505,6 +39508,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -30532,6 +39536,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30559,6 +39564,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30586,6 +39592,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30613,6 +39620,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30640,6 +39648,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30667,6 +39676,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30694,6 +39704,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30721,6 +39732,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30748,6 +39760,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30775,6 +39788,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30802,6 +39816,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30829,6 +39844,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30856,6 +39872,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30883,6 +39900,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30910,6 +39928,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30937,6 +39956,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "ee8dd5c65", "number": "7035" @@ -30964,6 +39984,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -30991,6 +40012,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -31018,6 +40040,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -31045,6 +40068,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -31072,6 +40096,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -31099,6 +40124,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -31126,6 +40152,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -31153,6 +40180,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "bca95ca51", "number": "7036" @@ -31180,6 +40208,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31207,6 +40236,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31234,6 +40264,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31261,6 +40292,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31288,6 +40320,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31315,6 +40348,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31342,6 +40376,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31369,6 +40404,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31396,6 +40432,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31423,6 +40460,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31450,6 +40488,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31477,6 +40516,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31504,6 +40544,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31531,6 +40572,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31558,6 +40600,7 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" @@ -31585,10 +40628,2021 @@ "name_params_b": 6.74, "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", + "rpc": false, "build": { "hash": "1c398dc9e", "number": "7034" } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 64.83, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 50.19, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.73, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 66.04, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.73, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 50.78, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 62.7, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.71, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 45.01, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.73, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 65.83, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.72, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 42.96, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.73, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma-improved-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 61.63, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma-improved-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma-improved", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 36.76, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma-improved", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.93, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 66.33, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.04, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 30.17, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-rocwmma", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.05, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "hblt0", + "fa": false, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log", + "rpc": true, + "build": null + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha", + "env_base": "rocm7_alpha", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 31.42, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_alpha", + "env_base": "rocm7_alpha", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.08, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 63.23, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.7, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 30.12, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.72, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 63.17, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.72, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 36.22, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", + "model_clean": "GLM-4.6-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.71, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 356.79, + "file_size_gib": 189.69, + "name_params_b": 356.79, + "quant": "Q4_K_XL", + "log": "results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 172.03, + "tps_std": 0.73, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.02, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 174.52, + "tps_std": 1.29, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.94, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "caca0d55c", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 171.93, + "tps_std": 1.16, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.06, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 174.45, + "tps_std": 1.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.02, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log", + "rpc": true, + "build": { + "hash": "86f1f4411", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 170.89, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.89, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 173.53, + "tps_std": 1.57, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.92, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "f1840a25d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 172.01, + "tps_std": 0.91, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.95, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 173.9, + "tps_std": 0.67, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.95, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log", + "rpc": true, + "build": { + "hash": "677be4d78", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma-improved-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 142.82, + "tps_std": 0.78, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma-improved-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.85, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma-improved", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 144.47, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma-improved", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.86, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log", + "rpc": true, + "build": { + "hash": "12bb5c37", + "number": "7074" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 143.05, + "tps_std": 1.08, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.0, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 145.6, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-rocwmma", + "env_base": "rocm7_alpha", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.94, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "4db63cdde", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 144.59, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha-hblt0", + "env_base": "rocm7_alpha", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.01, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha", + "env_base": "rocm7_alpha", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 146.21, + "tps_std": 2.31, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_alpha", + "env_base": "rocm7_alpha", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.05, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log", + "rpc": true, + "build": { + "hash": "4fc43d43d", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 170.42, + "tps_std": 0.65, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.88, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 174.42, + "tps_std": 0.52, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.87, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log", + "rpc": true, + "build": { + "hash": "b447a9a4b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 171.34, + "tps_std": 0.97, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.97, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 173.98, + "tps_std": 1.06, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.94, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm,RPC", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 180.94, + "name_params_b": 228.69, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log", + "rpc": true, + "build": { + "hash": "fa5c85a8b", + "number": "7085" + } + }, + { + "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log", + "rpc": true, + "build": null } ] } \ No newline at end of file