diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 87adb9d..42bf6dd 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -28,7 +28,7 @@ jobs: IN='${{ inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7.1.1","rocm-7.1.1-rocwmma","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.4","rocm-7.1.1","rocm7-nightlies","vulkan-amdvlk","vulkan-radv"]' else # Remove spaces and build JSON array from comma list IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') diff --git a/.github/workflows/prune-old-toolboxes.yml b/.github/workflows/prune-old-toolboxes.yml index 872dd94..0aa486f 100644 --- a/.github/workflows/prune-old-toolboxes.yml +++ b/.github/workflows/prune-old-toolboxes.yml @@ -44,7 +44,7 @@ jobs: run: | IN='${{ github.event.inputs.backends }}' if [[ "$IN" == "all" || -z "$IN" ]]; then - JSON='["rocm-6.4.2","rocm-6.4.2-rocwmma","rocm-6.4.3","rocm-6.4.3-rocwmma","rocm-6.4.4","rocm-6.4.4-rocwmma","rocm-7.1","rocm-7.1-rocwmma","rocm-7beta","rocm-7alpha","rocm-7alpha-rocwmma","rocm-7alpha-rocwmma-improved","rocm-7rc","rocm-7rc-rocwmma","rocm-7rc-rocwmma-fa_all_quants","vulkan-amdvlk","vulkan-radv"]' + JSON='["rocm-6.4.2","rocm-6.4.3","rocm-6.4.4","rocm-7.1.1","rocm-7beta","rocm7-nightlies","vulkan-amdvlk","vulkan-radv"]' else IN_CLEAN=$(echo "$IN" | tr -d '[:space:]') JSON='["'${IN_CLEAN//,/\",\"}'"]' diff --git a/README.md b/README.md index 80ce59b..2b162c7 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,14 @@ This project provides pre-built containers (“toolboxes”) for running LLMs on **AMD Ryzen AI Max “Strix Halo”** integrated GPUs. Toolbx is the standard developer container system in Fedora (and now works on Ubuntu, openSUSE, Arch, etc). +## 🚨 Updates — 2026-01-10 + +- **Simplified Offering**: Removed `rocwmma` containers as standard kernels in newer `llama.cpp` are now faster and stable. +- **Renamings**: `rocm-7alpha` is now `rocm7-nightlies` to better reflect that it tracks TheRock nightly builds. +- **Discontinued**: `rocm-7rc` builds are discontinued as they are obsolete. +- **Housekeeping**: Deprecated `rocm-7beta` and other older tags. + + ## 🚨 CRITICAL WARNING — 2026-01-08 **Do NOT use `linux-firmware-20251125`.** It breaks ROCm support on Strix Halo (instability/crashes). @@ -11,7 +19,7 @@ AMD has recalled this update, but if you have already installed it, you must dow ## 🚨 Updates — 2025-11-18 -- Released new toolboxes for ROCm 7 that track the nightly builds, these are now called `alpha`. +- Released new toolboxes for ROCm 7 that track the nightly builds, these are now called `rocm7-nightlies`. - Updated and extended benchmakrs across all llama.cpp backend configurations, and included bennchmarks over RPC (two nodes) and long context (32k) -> [Interactive Benchmark Viewer](https://kyuz0.github.io/amd-strix-halo-toolboxes/) ## Watch the YouTube Video @@ -50,11 +58,11 @@ toolbox create llama-vulkan-radv \ -- --device /dev/dri --group-add video --security-opt seccomp=unconfined ``` -**Command — Create ROCm toolbox (6.4.4/7.1.1/7rc/7alpha)** +**Command — Create ROCm toolbox (6.4.4/7.1.1/rocm7-nightlies)** ```sh -toolbox create llama-rocm-7.1.1-rocwmma \ - --image docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1.1-rocwmma \ +toolbox create llama-rocm-7.1.1 \ + --image docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1.1 \ -- --device /dev/dri --device /dev/kfd \ --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined ``` @@ -112,7 +120,7 @@ llama-cli --no-mmap -ngl 999 -fa 1 -m models/qwen3-coder-30B-A3B/BF16/Qwen3-Code **Command — Refresh specific toolboxes** ```bash -./refresh-toolboxes.sh llama-vulkan-radv llama-rocm-7.1.1-rocwmma +./refresh-toolboxes.sh llama-vulkan-radv llama-rocm-7.1.1 ``` ## 1. Llama.cpp Compiled for Every Backend @@ -130,21 +138,13 @@ You can check the containers on DockerHub: https://hub.docker.com/r/kyuz0/amd-st | ------------------------------ | -------------------------------------- | --------------- | | `vulkan-amdvlk` | Vulkan (AMDVLK) | Fastest backend—AMD open-source driver. ≤2 GiB single buffer allocation limit, some large models won't load. | | `vulkan-radv` | Vulkan (Mesa RADV) | Most stable and compatible. Recommended for most users and all models. | -| `rocm-6.4.4` | ROCm 6.4.4 (HIP) + hipBLASLt* | Latest stable build for ROCm 6.4.4, performs very well with most model architectures/quants. | -| `rocm-6.4.4-rocwmma` | ROCm 6.4.4 + ROCWMMA + hipBLASLt* | 6.4.4 with ROCWMMA enabled for better flash attention on RDNA3+/CDNA. | -| `rocm-7.1.1` | ROCm 7.1.1 GA (HIP) + hipBLASLt* | Current GA release for ROCm 7.x; improved scheduler and hipBLASLt kernels. | -| `rocm-7.1.1-rocwmma` | ROCm 7.1.1 GA + ROCWMMA + hipBLASLt* | 7.1.1 with ROCWMMA for maximum flash-attention throughput. | -| `rocm-7rc` | ROCm 7.9 (HIP) + hipBLASLt* | Used to be the release candidate for ROCm 7.9.0 (hence the `rc` tag in the name), now released. | -| `rocm-7rc-rocwmma` | ROCm 7.9 + ROCWMMA + hipBLASLt* | 7.9.0 build with ROCWMMA—useful for early flash-attention validation. | -| `rocm-7alpha` | ROCm 7 Nightly (“7rc-alpha”) + hipBLASLt* | Tracks ROCm 7 nightly (alpha) preview with bleeding-edge patches. | -| `rocm-7alpha-rocwmma` | ROCm 7 Nightly + ROCWMMA + hipBLASLt* | Same nightly/alpha stack with ROCWMMA tuned for flash attention. | -| `rocm-7alpha-rocwmma-improved` | ROCm 7 Nightly + ROCWMMA (improved) + hipBLASLt* | Nightly/Alpha stack plus extra ROCWMMA fixes; fastest but most experimental option. | - -\* All these toolboxes export `ROCBLAS_USE_HIPBLASLT=1` because it historically delivered better performance and stability, altough this might not be the case any more. +| `rocm-6.4.4` | ROCm 6.4.4 (HIP) | Latest stable build for ROCm 6.4.4, performs very well with most model architectures/quants. | +| `rocm-7.1.1` | ROCm 7.1.1 GA (HIP) | Current GA release for ROCm 7.x; improved scheduler and kernels. | +| `rocm7-nightlies` | ROCm 7 Nightly | Tracks ROCm 7 nightly builds with bleeding-edge patches. | > These containers are **automatically** rebuilt whenever the Llama.cpp master branch is updated, ensuring you get the latest bug fixes and new model support. The easiest way to update to the newest versions is by running the `refresh-toolboxes.sh` [script below](#211-toolbox-refresh-script-automatic-updates). > -> Legacy images `rocm-6.4.2` and `rocm-6.4.3` are still on Docker Hub for reproducibility but are intentionally excluded from the active list above. Prefer `rocm-6.4.4+` or any `rocm-7.x` tag unless you must bisect an old regression. (The `rocm-7beta` images share the same status.) +> Legacy images `rocm-6.4.2` and `rocm-6.4.3` are still on Docker Hub for reproducibility but are intentionally excluded from the active list above. Prefer `rocm-6.4.4+` or any `rocm-7.x` tag unless you must bisect an old regression. (The `rocm-7beta` and `rocm-7rc` images share the same status.) --- @@ -164,16 +164,16 @@ toolbox create llama-vulkan-radv \ *Only `/dev/dri` is required for Vulkan. Make sure your user is in the `video` group.* -#### Command — Create ROCm toolbox (swap the tag for 6.4.4, 7.1, 7rc, 7alpha…) +#### Command — Create ROCm toolbox (swap the tag for 6.4.4, 7.1, rocm7-nightlies…) ```sh -toolbox create llama-rocm-7.1-rocwmma \ - --image docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1-rocwmma \ +toolbox create llama-rocm-7.1 \ + --image docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1 \ -- --device /dev/dri --device /dev/kfd \ --group-add video --group-add render --group-add sudo --security-opt seccomp=unconfined ``` -*ROCm needs both `/dev/dri` and `/dev/kfd`, plus the `video`, `render`, and sometimes `sudo` groups for full compute access. Swap `rocm-7.1-rocwmma` for any other active ROCm tag (6.4.4, 7rc, 7alpha, etc.).* +*ROCm needs both `/dev/dri` and `/dev/kfd`, plus the `video`, `render`, and sometimes `sudo` groups for full compute access. Swap `rocm-7.1` for any other active ROCm tag (6.4.4, rocm7-nightlies, etc.).* > **Note:** > @@ -188,7 +188,7 @@ Ubuntu’s `toolbox` package still breaks GPU access, so follow gyhor’s [issue ```sh distrobox create -n llama-rocm-7.1.1 \ - --image docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1.1-rocwmma \ + --image docker.io/kyuz0/amd-strix-halo-toolboxes:rocm-7.1.1 \ --additional-flags "--device /dev/kfd --device /dev/dri --group-add video --group-add render --security-opt seccomp=unconfined" distrobox enter llama-rocm-7.1.1 llama-cli --list-devices @@ -213,7 +213,7 @@ This will: You can also refresh just one or more toolboxes: ```bash -./refresh-toolboxes.sh llama-vulkan-radv llama-rocm-7.1.1-rocwmma +./refresh-toolboxes.sh llama-vulkan-radv llama-rocm-7.1.1 ``` ### 2.2 Running models inside the toolboxes diff --git a/benchmark/compare_hblt0.py b/benchmark/compare_hblt0.py index 094ec94..68bfd3e 100644 --- a/benchmark/compare_hblt0.py +++ b/benchmark/compare_hblt0.py @@ -18,7 +18,7 @@ from pathlib import Path from typing import Dict, Iterable, List, Tuple -DEFAULT_RESULTS = Path("docs") / "results.json" +DEFAULT_RESULTS = Path("../docs") / "results.json" # Matches the tolerance used in docs/assets/index2.js (MIN_TOL = 0.25) DEFAULT_TOLERANCE = 0.25 diff --git a/benchmark/delete_transient_failures.py b/benchmark/delete_transient_failures.py deleted file mode 100644 index aa72778..0000000 --- a/benchmark/delete_transient_failures.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import glob -import os -import re - -RESULTS_DIR_DEFAULT = "results" - -# Same detection logic as your extractor -HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE) -SEP_RE = re.compile(r"^\|\s*-+") - -LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE) -HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE) -GENERIC_ERR = re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE) - - -def parse_table(text): - lines = text.splitlines() - rows = [] - header = None - col_idx = {} - - for line in lines: - if HEADER_RE.search(line): - header = [c.strip().lower() for c in line.strip().strip("|").split("|")] - for idx, name in enumerate(header): - col_idx[name] = idx - continue - - if header and (SEP_RE.search(line) or not line.strip()): - continue - - if header and line.startswith("|"): - parts = [c.strip() for c in line.strip().strip("|").split("|")] - if len(parts) < len(header): - continue - row = {} - for name, idx in col_idx.items(): - row[name] = parts[idx] - rows.append(row) - - if header and line.strip() == "" and rows: - break - - return rows - - -def detect_error(text): - if LOAD_ERR.search(text): - return True - if HANG_ERR.search(text): - return True - if GENERIC_ERR.search(text): - return True - return False - - -def is_non_transient_vram_issue(text): - # Do NOT delete logs with this kind of Vulkan OOM - return ( - "ggml_vulkan: Device memory allocation of size" in text - and "Requested buffer size exceeds device buffer size limit" in text - ) - - -def is_failed_run(text): - table_rows = parse_table(text) - - has_pp = any(r.get("test", "").lower() == "pp512" for r in table_rows) - has_tg = any(r.get("test", "").lower() == "tg128" for r in table_rows) - - if has_pp or has_tg: - return False - - return detect_error(text) - - -def main(): - ap = argparse.ArgumentParser( - description="Delete transient-failure benchmark logs in results/" - ) - ap.add_argument( - "--results-dir", - default=RESULTS_DIR_DEFAULT, - help="Directory containing *.log files (default: results)", - ) - ap.add_argument( - "--dry-run", - action="store_true", - help="Only print what would be deleted", - ) - args = ap.parse_args() - - results_dir = args.results_dir - pattern = os.path.join(results_dir, "*.log") - - to_delete = [] - skipped_non_transient = [] - - for path in sorted(glob.glob(pattern)): - try: - with open(path, errors="ignore") as f: - text = f.read() - except OSError as e: - print(f"Could not read {path}: {e}") - continue - - if not is_failed_run(text): - continue - - if is_non_transient_vram_issue(text): - skipped_non_transient.append(path) - continue - - to_delete.append(path) - - if not to_delete and not skipped_non_transient: - print("No failed logs found.") - return - - if skipped_non_transient: - print("Keeping logs with non transient VRAM issues:") - for p in skipped_non_transient: - print(f" KEEP {p}") - - if to_delete: - print("Deleting logs with transient failures:") - for p in to_delete: - print(f" DELETE {p}") - if not args.dry_run: - try: - os.remove(p) - except OSError as e: - print(f" Failed to delete {p}: {e}") - else: - print("No logs to delete.") - - -if __name__ == "__main__": - main() diff --git a/benchmark/generate_markdown_results.py b/benchmark/generate_markdown_results.py deleted file mode 100644 index c9e31e6..0000000 --- a/benchmark/generate_markdown_results.py +++ /dev/null @@ -1,571 +0,0 @@ -#!/usr/bin/env python3 -""" -gen_benchmarks_md.py — Generate Markdown for README + detailed benchmarks from results.json - -Defaults: -- Input JSON: ../docs/results.json -- Outputs: ./README_benchmarks_section.md and ./benchmarks_generated.md -""" - -from __future__ import annotations -import json -import argparse -import statistics as stats -from pathlib import Path -from collections import defaultdict -from typing import Dict, List, Tuple, Optional - -# === ENV LABELS === -ENV_LABEL: Dict[str, str] = { - # ROCm 7 RC - "rocm7_rc-rocwmma": "ROCm 7 RC + ROCWMMA + hipBLASLt", - "rocm7_rc": "ROCm 7 RC (hipBLASLt)", - "rocm7_rc-hblt0": "ROCm 7 RC (hipBLASLt OFF)", - "rocm7_rc-rocwmma-hblt0": "ROCm 7 RC + ROCWMMA (hipBLASLt OFF)", - - # ROCm 6.4.4 - "rocm6_4_4": "ROCm 6.4.4 (hipBLASLt)", - "rocm6_4_4-hblt0": "ROCm 6.4.4 (hipBLASLt OFF)", - "rocm6_4_4-rocwmma": "ROCm 6.4.4 + ROCWMMA (hipBLASLt)", - "rocm6_4_4-rocwmma-hblt0": "ROCm 6.4.4 + ROCWMMA (hipBLASLt OFF)", - - # Vulkan - "vulkan_amdvlk": "Vulkan AMDVLK", - "vulkan_radv": "Vulkan RADV", -} - -TESTS = ["pp512", "tg128"] - -def md_row(values: List[str]) -> str: - return "| " + " | ".join(values) + " |" - - -def load_results(path: Path) -> Dict: - data = json.loads(path.read_text()) - assert "runs" in data and isinstance(data["runs"], list), "results.json must have a top-level 'runs' list" - return data - - -def envs_present(runs: List[Dict], only_env: Optional[List[str]], include_all_envs: bool) -> List[str]: - present = {r.get("env") for r in runs if r.get("env")} - if only_env: - present = present.intersection(set(only_env)) - if include_all_envs: - # Include even if not present (might appear 0 rows in tables) - envs = [e for e in ENV_LABEL.keys() if (not only_env or e in only_env)] - else: - envs = [e for e in ENV_LABEL.keys() if e in present and (not only_env or e in only_env)] - return envs - - -def fa_to_filter(fa: str) -> Optional[bool]: - fa = fa.lower().strip() - if fa == "on": - return True - if fa == "off": - return False - if fa == "any": - return None - raise ValueError("--fa must be on/off/any") - - -def margin_aware_placements( - runs: List[Dict], - envs: List[str], - test_filter: str, - fa_filter: Optional[bool] -) -> Tuple[Dict[str, Dict[str, int]], int]: - """ - Returns (placements, sample_count) - placements[env] -> {"first": n, "second": n, "third": n} - sample_count = number of model+quant comparisons considered - """ - placements = defaultdict(lambda: {"first": 0, "second": 0, "third": 0}) - # group by (model, quant) - grouped = defaultdict(list) - for r in runs: - if r.get("error"): - continue - if r.get("test") != test_filter: - continue - if fa_filter is not None and r.get("fa") != fa_filter: - continue - if r.get("env") not in envs: - continue - key = (r.get("model_clean"), r.get("quant")) - grouped[key].append(r) - - samples = 0 - for key, entries in grouped.items(): - # collate by env - env_groups = defaultdict(list) - for e in entries: - env_groups[e["env"]].append(e) - env_list = [e for e in envs if e in env_groups] # keep requested order - if len(env_list) < 2: - continue - - # summarize median mean ± median err per env - summary = {} - for env in env_list: - means = [x["tps_mean"] for x in env_groups[env] if x.get("tps_mean") is not None] - errs = [x.get("tps_err", 0.0) or 0.0 for x in env_groups[env]] - if not means: - continue - m = stats.median(means) - e = stats.median(errs) if errs else 0.0 - summary[env] = (m - e, m + e, m) - if len(summary) < 2: - continue - - samples += 1 - - # rank with overlap -> ties share rank - remaining = [env for env, _ in sorted(summary.items(), key=lambda kv: kv[1][2], reverse=True)] - assigned = {} - current_rank = 1 - while remaining and current_rank <= 3: - env0 = remaining[0] - low0, high0, _ = summary[env0] - tied = [env0] - for env in remaining[1:]: - low, high, _ = summary[env] - if not (low > high0 or high < low0): # overlap -> tie - tied.append(env) - for env in tied: - assigned[env] = current_rank - remaining = [e for e in remaining if e not in tied] - current_rank += 1 - - for env, rk in assigned.items(): - if rk == 1: - placements[env]["first"] += 1 - elif rk == 2: - placements[env]["second"] += 1 - elif rk == 3: - placements[env]["third"] += 1 - - return placements, samples - - -def pairwise_win_counts(runs: List[Dict], envA: str, envB: str, test: str, fa_filter: Optional[bool]) -> Tuple[int, int, int, int]: - A = {} - B = {} - for r in runs: - if r.get("error") or r.get("test") != test: - continue - if fa_filter is not None and r.get("fa") != fa_filter: - continue - key = (r.get("model_clean"), r.get("quant")) - if r.get("env") == envA: - A[key] = r["tps_mean"] - elif r.get("env") == envB: - B[key] = r["tps_mean"] - winsA = winsB = ties = 0 - for k in (set(A) & set(B)): - if A[k] > B[k]: - winsA += 1 - elif B[k] > A[k]: - winsB += 1 - else: - ties += 1 - total = winsA + winsB + ties - return winsA, winsB, ties, total - - -def average_ranks(place_dict: Dict[str, Dict[str, int]]) -> Dict[str, Optional[float]]: - avg = {} - for env, c in place_dict.items(): - total = c.get("first", 0) + c.get("second", 0) + c.get("third", 0) - if total == 0: - avg[env] = None - else: - avg[env] = round((1 * c.get("first", 0) + 2 * c.get("second", 0) + 3 * c.get("third", 0)) / total, 2) - return avg - - -def flash_attention_effect(runs: List[Dict], envs: List[str]) -> Dict[str, Dict[str, Dict[str, float]]]: - """ - Returns: effects[env][test] = {n_pairs, median_pct, min, max} - Based on paired model+quant runs (ON vs OFF). - """ - model_pairs = defaultdict(lambda: defaultdict(dict)) # (env,test)->(model,quant)->{fa: tps} - for r in runs: - if r.get("error") or r.get("tps_mean") is None: - continue - if r.get("test") not in TESTS: - continue - if r.get("env") not in envs: - continue - model_key = (r.get("model_clean"), r.get("quant")) - model_pairs[(r["env"], r["test"])][model_key][r.get("fa")] = r["tps_mean"] - - summary = defaultdict(dict) - for (env, test), d in model_pairs.items(): - deltas = [] - for mk, vals in d.items(): - if True in vals and False in vals and vals[False] > 0: - deltas.append((vals[True] - vals[False]) / vals[False] * 100.0) - if deltas: - summary[env][test] = { - "n_pairs": len(deltas), - "median_pct": round(stats.median(deltas), 1), - "min": round(min(deltas), 1), - "max": round(max(deltas), 1), - } - return summary - - -def rocwmma_effect(runs: List[Dict], pairs_to_compare: List[Tuple[str, str, str]], tests: List[str]) -> List[Tuple[str, str, str, str, int, float]]: - """ - Compare ROCWMMA ON vs OFF with same hipBLASLt state. - Returns rows of (context_label, test, env_on, env_off, n_pairs, median_delta_pct) - where delta_pct = median(ON/OFF - 1)*100 over common model+quant. - """ - rows = [] - for env_on, env_off, label in pairs_to_compare: - for test in tests: - data_on = defaultdict(list) - data_off = defaultdict(list) - for r in runs: - if r.get("error") or r.get("test") != test: - continue - if r.get("env") == env_on: - data_on[(r.get("model_clean"), r.get("quant"))].append(r["tps_mean"]) - elif r.get("env") == env_off: - data_off[(r.get("model_clean"), r.get("quant"))].append(r["tps_mean"]) - common = sorted(set(data_on) & set(data_off)) - if not common: - continue - ratios = [] - for k in common: - aon = stats.median(data_on[k]) - aoff = stats.median(data_off[k]) - if aoff > 0: - ratios.append(aon / aoff - 1.0) - if ratios: - rows.append((label, test, env_on, env_off, len(ratios), round(100 * stats.median(ratios), 1))) - return rows - - -def hipblaslt_effect(runs: List[Dict], pairs_to_compare: List[Tuple[str, str, str]], tests: List[str]) -> List[Tuple[str, str, str, str, int, float]]: - """ - Compare hipBLASLt ON vs OFF with same ROCWMMA state. - Returns rows of (context_label, test, env_on, env_off, n_pairs, median_delta_pct) - where delta_pct = median(ON/OFF - 1)*100 over common model+quant. - """ - rows = [] - for env_on, env_off, label in pairs_to_compare: - for test in tests: - data_on = defaultdict(list) - data_off = defaultdict(list) - for r in runs: - if r.get("error") or r.get("test") != test: - continue - if r.get("env") == env_on: - data_on[(r.get("model_clean"), r.get("quant"))].append(r["tps_mean"]) - elif r.get("env") == env_off: - data_off[(r.get("model_clean"), r.get("quant"))].append(r["tps_mean"]) - common = sorted(set(data_on) & set(data_off)) - if not common: - continue - ratios = [] - for k in common: - aon = stats.median(data_on[k]) - aoff = stats.median(data_off[k]) - if aoff > 0: - ratios.append(aon / aoff - 1.0) - if ratios: - rows.append((label, test, env_on, env_off, len(ratios), round(100 * stats.median(ratios), 1))) - return rows - - -def amdvlk_vs_radv(runs: List[Dict], fa_filter: Optional[bool]) -> List[Tuple[str, int, int, int, int]]: - rows = [] - for test in TESTS: - wa, wr, ties, total = pairwise_win_counts(runs, "vulkan_amdvlk", "vulkan_radv", test, fa_filter) - rows.append((test, wa, wr, ties, total)) - return rows - - -def winners(place_dict: Dict[str, Dict[str, int]], slot="first") -> Tuple[List[str], int]: - max_count = max((c.get(slot, 0) for c in place_dict.values()), default=0) - win_list = [env for env, c in place_dict.items() if c.get(slot, 0) == max_count and max_count > 0] - return win_list, max_count - - -def human_list(envs: List[str]) -> str: - return ", ".join(ENV_LABEL.get(e, e) for e in envs) if envs else "—" - - -def build_readme_section( - envs: List[str], - pp_place: Dict[str, Dict[str, int]], - tg_place: Dict[str, Dict[str, int]], - fa_filter: Optional[bool] -) -> str: - # Winners - pp_wins, _ = winners(pp_place, "first") - tg_wins, _ = winners(tg_place, "first") - - lines: List[str] = [] - lines.append("## 3. Performance Benchmarks (Key Results)") - lines.append("") - lines.append("🌐 Interactive exploration of the latest benchmark runs: [Interactie Benchmark Viewer](https://kyuz0.github.io/amd-strix-halo-toolboxes/)") - lines.append("") - lines.append("Benchmarks were analysed with **error-aware ties** (mean ± σ). If two backends overlap within margins, they are treated as a tie. All placement counts below use **Flash Attention ON**.") - lines.append("") - - # Placement tables - def place_table(title: str, place_dict: Dict[str, Dict[str, int]]): - lines.append(f"**{title}**") - lines.append(md_row(["Backend", "1st", "2nd", "3rd"])) - lines.append(md_row(["---", "---:", "---:", "---:"])) - order = sorted(place_dict.items(), key=lambda kv: (-kv[1].get("first", 0), -kv[1].get("second", 0), kv[0])) - for env, c in order: - lines.append(md_row([ENV_LABEL.get(env, env), str(c.get("first", 0)), str(c.get("second", 0)), str(c.get("third", 0))])) - lines.append("") - - place_table("Prompt Processing (pp512)", pp_place) - place_table("Token Generation (tg128)", tg_place) - - # Data-driven recommendations - def total_score(c: Dict[str, int]) -> int: - # weight 1st more than 2nd - return c.get("first", 0) * 2 + c.get("second", 0) - - best_bal_score = -1 - balanced: List[str] = [] - for env in envs: - score = total_score(pp_place.get(env, {})) + total_score(tg_place.get(env, {})) - if score > best_bal_score: - best_bal_score = score - balanced = [env] - elif score == best_bal_score: - balanced.append(env) - - lines.append("### Summary & Recommendations") - lines.append(f"- **Fastest prompt processing:** {human_list(pp_wins)} (most 1st-place finishes).") - lines.append(f"- **Fastest token generation:** {human_list(tg_wins)} (most 1st-place finishes).") - lines.append(f"- **Balanced choice:** {human_list(balanced)} (consistently near the top across PP/TG).") - lines.append("") - lines.append("> **Note (ROCm 7):** Toolboxes enable **hipBLASLt** by default. The benchmark suite also runs **hipBLASLt OFF** variants to show its impact.") - return "\n".join(lines) - - -def build_benchmarks_doc( - runs: List[Dict], - envs: List[str], - pp_place: Dict[str, Dict[str, int]], - tg_place: Dict[str, Dict[str, int]], - fa_filter: Optional[bool], -) -> str: - lines: List[str] = [] - lines.append("# AMD Strix Halo — llama.cpp Toolboxes (Benchmarks)") - lines.append("") - lines.append("**Interactive results:** https://kyuz0.github.io/amd-strix-halo-toolboxes/") - lines.append("") - lines.append("## Table of Contents") - lines.append("- [Benchmark methodology](#benchmark-methodology)") - lines.append("- [Summary of current dataset (Flash Attention ON)](#summary-of-current-dataset-flash-attention-on)") - lines.append(" - [Placement counts](#placement-counts)") - lines.append(" - [Pairwise head-to-head wins](#pairwise-head-to-head-wins)") - lines.append(" - [Average ranks](#average-ranks)") - lines.append("- [Analyses by feature](#analyses-by-feature)") - lines.append(" - [Impact of Flash Attention](#impact-of-flash-attention)") - lines.append(" - [Impact of ROCWMMA](#impact-of-rocwmma)") - lines.append(" - [Impact of hipBLASLt](#impact-of-hipblaslt)") - lines.append(" - [Vulkan: AMDVLK vs RADV](#vulkan-amdvlk-vs-radv)") - lines.append("- [Recommendations](#recommendations)") - lines.append("- [Winner calculation](#winner-calculation)") - lines.append("") - lines.append("---") - lines.append("") - lines.append("## Benchmark methodology") - lines.append("") - lines.append("- **pp512** — prompt processing throughput (tokens/sec, prefill)") - lines.append("- **tg128** — token generation throughput (tokens/sec, interactive)") - lines.append("- Each backend tested twice per model: `-fa 0` and `-fa 1`") - lines.append("- Winners per model/test are **margin-aware**; multiple winners are possible when mean±σ overlap") - lines.append("- Built from the same llama.cpp commit for consistency") - lines.append("") - lines.append("**Backends in this dataset:** " + ", ".join(ENV_LABEL.get(e, e) for e in envs)) - lines.append("") - lines.append("**ROCm 7 hipBLASLt policy:** Toolboxes ship with **hipBLASLt enabled** by default (`ROCBLAS_USE_HIPBLASLT=1`). The benchmark script also runs **hipBLASLt OFF** variants (`-hblt0`) to measure its effect.") - lines.append("") - lines.append("---") - lines.append("") - lines.append("## Summary of current dataset (Flash Attention ON)") - lines.append("") - # Placement counts - lines.append("### Placement counts") - def place_block(title: str, place_dict: Dict[str, Dict[str, int]]): - lines.append(f"**{title}**") - lines.append(md_row(["Backend", "1st", "2nd", "3rd"])) - lines.append(md_row(["---", "---:", "---:", "---:"])) - order = sorted(place_dict.items(), key=lambda kv: (-kv[1].get("first", 0), -kv[1].get("second", 0), kv[0])) - for env, c in order: - lines.append(md_row([ENV_LABEL.get(env, env), str(c.get("first", 0)), str(c.get("second", 0)), str(c.get("third", 0))])) - lines.append("") - place_block("Prompt Processing (pp512)", pp_place) - place_block("Token Generation (tg128)", tg_place) - - # Pairwise wins - lines.append("### Pairwise head-to-head wins") - lines.append("For any model+quant where both backends succeeded, this counts who was faster (ties when equal).") - lines.append(md_row(["Comparison", "Test", "A wins", "B wins", "Ties", "Total"])) - lines.append(md_row(["---", "---", "---:", "---:", "---:", "---:"])) - pairs = [ - ("ROCm 7 RC + ROCWMMA + hipBLASLt", "Vulkan AMDVLK", "rocm7_rc-rocwmma", "vulkan_amdvlk"), - ("ROCm 7 RC + ROCWMMA + hipBLASLt", "Vulkan RADV", "rocm7_rc-rocwmma", "vulkan_radv"), - ("Vulkan AMDVLK", "Vulkan RADV", "vulkan_amdvlk", "vulkan_radv"), - ] - for labelA, labelB, envA, envB in pairs: - for test in TESTS: - a, b, t, total = pairwise_win_counts(runs, envA, envB, test, fa_filter) - lines.append(md_row([f"{labelA} vs {labelB}", test, str(a), str(b), str(t), str(total)])) - lines.append("") - - # Average ranks - lines.append("### Average ranks") - avg_pp = average_ranks(pp_place) - avg_tg = average_ranks(tg_place) - lines.append("**Prompt Processing (pp512)**") - lines.append(md_row(["Backend", "Avg Rank (↓ is better)"])) - lines.append(md_row(["---", "---:"])) - for env, val in sorted(avg_pp.items(), key=lambda kv: (kv[1] is None, kv[1] or 99)): - lines.append(md_row([ENV_LABEL.get(env, env), str(val) if val is not None else "—"])) - lines.append("") - lines.append("**Token Generation (tg128)**") - lines.append(md_row(["Backend", "Avg Rank (↓ is better)"])) - lines.append(md_row(["---", "---:"])) - for env, val in sorted(avg_tg.items(), key=lambda kv: (kv[1] is None, kv[1] or 99)): - lines.append(md_row([ENV_LABEL.get(env, env), str(val) if val is not None else "—"])) - lines.append("") - lines.append("---") - lines.append("") - lines.append("## Analyses by feature") - lines.append("") - - # Flash Attention effect - lines.append("### Impact of Flash Attention") - fa_eff = flash_attention_effect(runs, envs) - lines.append("Median % change when **Flash Attention ON vs OFF**, paired by model+quant, per backend:") - lines.append(md_row(["Backend", "pp512 Δ% (median, min..max, n)", "tg128 Δ% (median, min..max, n)"])) - lines.append(md_row(["---", "---", "---"])) - def fmt_eff(row: Optional[Dict[str, float]]) -> str: - return f"{row['median_pct']}% ({row['min']}..{row['max']}), n={row['n_pairs']}" if row else "—" - for env in envs: - row_pp = fa_eff.get(env, {}).get("pp512") - row_tg = fa_eff.get(env, {}).get("tg128") - lines.append(md_row([ENV_LABEL.get(env, env), fmt_eff(row_pp), fmt_eff(row_tg)])) - lines.append("") - - # ROCWMMA effect — check both ROCm 7 and 6.4.4 families if present - lines.append("### Impact of ROCWMMA") - rocwmma_pairs = [] - if "rocm7_rc-rocwmma" in envs and "rocm7_rc" in envs: - rocwmma_pairs.append(("rocm7_rc-rocwmma", "rocm7_rc", "ROCm 7 RC (hipBLASLt)")) - if "rocm7_rc-rocwmma-hblt0" in envs and "rocm7_rc-hblt0" in envs: - rocwmma_pairs.append(("rocm7_rc-rocwmma-hblt0", "rocm7_rc-hblt0", "ROCm 7 RC (hipBLASLt OFF)")) - if "rocm6_4_4-rocwmma" in envs and "rocm6_4_4" in envs: - rocwmma_pairs.append(("rocm6_4_4-rocwmma", "rocm6_4_4", "ROCm 6.4.4 (hipBLASLt)")) - if "rocm6_4_4-rocwmma-hblt0" in envs and "rocm6_4_4-hblt0" in envs: - rocwmma_pairs.append(("rocm6_4_4-rocwmma-hblt0", "rocm6_4_4-hblt0", "ROCm 6.4.4 (hipBLASLt OFF)")) - - rocwmma_rows = rocwmma_effect(runs, rocwmma_pairs, TESTS) - lines.append(md_row(["Context", "Test", "Compared Envs", "Pairs", "Median Δ%"])) - lines.append(md_row(["---", "---", "---", "---:", "---:"])) - for label, test, env_on, env_off, n, delta in rocwmma_rows: - lines.append(md_row([label, test, f"{ENV_LABEL.get(env_on, env_on)} vs {ENV_LABEL.get(env_off, env_off)}", str(n), f"{delta}%"])) - lines.append("") - - # hipBLASLt effect — for both ROCm 7 and 6.4.4 families - lines.append("### Impact of hipBLASLt") - hip_pairs = [] - if "rocm7_rc" in envs and "rocm7_rc-hblt0" in envs: - hip_pairs.append(("rocm7_rc", "rocm7_rc-hblt0", "ROCm 7 RC (no ROCWMMA)")) - if "rocm7_rc-rocwmma" in envs and "rocm7_rc-rocwmma-hblt0" in envs: - hip_pairs.append(("rocm7_rc-rocwmma", "rocm7_rc-rocwmma-hblt0", "ROCm 7 RC + ROCWMMA")) - if "rocm6_4_4" in envs and "rocm6_4_4-hblt0" in envs: - hip_pairs.append(("rocm6_4_4", "rocm6_4_4-hblt0", "ROCm 6.4.4 (no ROCWMMA)")) - if "rocm6_4_4-rocwmma" in envs and "rocm6_4_4-rocwmma-hblt0" in envs: - hip_pairs.append(("rocm6_4_4-rocwmma", "rocm6_4_4-rocwmma-hblt0", "ROCm 6.4.4 + ROCWMMA")) - - hip_rows = hipblaslt_effect(runs, hip_pairs, TESTS) - lines.append(md_row(["Context", "Test", "Compared Envs", "Pairs", "Median Δ%"])) - lines.append(md_row(["---", "---", "---", "---:", "---:"])) - for label, test, env_on, env_off, n, delta in hip_rows: - lines.append(md_row([label, test, f"{ENV_LABEL.get(env_on, env_on)} vs {ENV_LABEL.get(env_off, env_off)}", str(n), f"{delta}%"])) - lines.append("") - - # AMDVLK vs RADV - lines.append("### Vulkan: AMDVLK vs RADV") - lines.append("Head-to-head wins with selected Flash Attention filter:") - lines.append(md_row(["Test", "AMDVLK wins", "RADV wins", "Ties", "Total"])) - lines.append(md_row(["---", "---:", "---:", "---:", "---:"])) - for test, wa, wr, t, total in amdvlk_vs_radv(runs, fa_filter): - lines.append(md_row([test, str(wa), str(wr), str(t), str(total)])) - lines.append("") - lines.append("---") - lines.append("") - lines.append("## Recommendations") - pp_wins, _ = winners(pp_place, "first") - tg_wins, _ = winners(tg_place, "first") - lines.append(f"- **Fastest prompt processing:** {human_list(pp_wins)} (most 1st-place finishes with selected Flash Attention filter).") - lines.append(f"- **Fastest token generation:** {human_list(tg_wins)} (most 1st-place finishes with selected Flash Attention filter).") - # Balanced: highest (2*first + second) across PP+TG - def score(c: Dict[str, int]) -> int: - return c.get("first", 0) * 2 + c.get("second", 0) - best_bal = -1 - balanced: List[str] = [] - for env in envs: - s = score(pp_place.get(env, {})) + score(tg_place.get(env, {})) - if s > best_bal: - best_bal = s - balanced = [env] - elif s == best_bal: - balanced.append(env) - lines.append(f"- **Balanced choice:** {human_list(balanced)} (consistently near the top across PP/TG).") - lines.append("") - lines.append("---") - lines.append("") - lines.append("## Winner calculation") - lines.append("A backend is counted as a winner if its mean throughput is within the best backend’s pooled ± error margin for that model/test type. This treats results within measurement noise as ties instead of false losses.") - return "\n".join(lines) - -def main(): - ap = argparse.ArgumentParser() - ap.add_argument("--file", type=Path, default=Path("../docs/results.json"), - help="Path to results.json (default: ../docs/results.json)") - ap.add_argument("--out-readme", type=Path, default=Path("./README_benchmarks_section.md"), - help="Path to write README section Markdown (default: ./README_benchmarks_section.md)") - ap.add_argument("--out-bench", type=Path, default=Path("./benchmarks_generated.md"), - help="Path to write detailed benchmarks Markdown (default: ./benchmarks_generated.md)") - ap.add_argument("--fa", choices=["on", "off", "any"], default="on", - help="Flash Attention filter (default: on)") - ap.add_argument("--include-all-envs", action="store_true", - help="Include envs even if not present in results.json") - ap.add_argument("--only-env", action="append", - help="Restrict analysis to specific env keys (repeatable)") - args = ap.parse_args() - - data = load_results(args.file) - runs: List[Dict] = data["runs"] - fa_filter = fa_to_filter(args.fa) - envs = envs_present(runs, args.only_env, args.include_all_envs) - - pp_place, _ = margin_aware_placements(runs, envs, "pp512", fa_filter) - tg_place, _ = margin_aware_placements(runs, envs, "tg128", fa_filter) - - readme_md = build_readme_section(envs, pp_place, tg_place, fa_filter) - args.out_readme.write_text(readme_md) - - bench_md = build_benchmarks_doc(runs, envs, pp_place, tg_place, fa_filter) - args.out_bench.write_text(bench_md) - - print(f"Wrote:\n - {args.out_readme}\n - {args.out_bench}") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/benchmark/generate_results.json.py b/benchmark/generate_results_json.py similarity index 99% rename from benchmark/generate_results.json.py rename to benchmark/generate_results_json.py index 42d5d62..8e5e3d3 100644 --- a/benchmark/generate_results.json.py +++ b/benchmark/generate_results_json.py @@ -44,7 +44,8 @@ LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE) ENV_CANON = { "rocm7_1_1": "rocm7.1.1", - "rocm7_alpha": "rocm-7alpha", + "rocm7_alpha": "rocm7-nightlies", + "rocm-7alpha": "rocm7-nightlies", } def clean_model_name(raw): diff --git a/benchmark/parse_benchmark_results.py b/benchmark/parse_benchmark_results.py deleted file mode 100644 index ed9f60b..0000000 --- a/benchmark/parse_benchmark_results.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python3 -import re, glob, os - -# This script parses llama-bench logs in 'results/' to produce -# Markdown tables for pp512 (prompt processing) and tg128 (text generation). - -# Regex patterns to extract tokens/sec rows -PP_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*pp512\s*\|\s*([\d.]+)\s*±\s*([\d.]+)") -TG_RE = re.compile(r"\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|[^|]*\|\s*tg128\s*\|\s*([\d.]+)\s*±\s*([\d.]+)") - -# Patterns to classify errors -LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed", re.IGNORECASE) -HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE) -GENERIC_ERR = re.compile(r"error:|exit \d+", re.IGNORECASE) - -# Env ordering -ENV_ORDER = ["vulkan_radv","vulkan_amdvlk","rocm6_4_2","rocm7_beta","rocm7_rc"] - -data = {} - -# Utility to clean model names -def clean_name(raw): - return re.sub(r"-000\d+-of-000\d+", "", raw) - -# Scan logs -glob_pattern = os.path.join("results", "*.log") -for path in sorted(glob.glob(glob_pattern)): - # Fix: use rsplit, not rssplit - base = os.path.basename(path).rsplit('.log',1)[0] - if '__' not in base: - continue - model_raw, env = base.split('__',1) - model = clean_name(model_raw) - - text = open(path, errors='ignore').read() - # Determine error type - if LOAD_ERR.search(text): - err_type = 'load' - elif HANG_ERR.search(text): - err_type = 'hang' - elif GENERIC_ERR.search(text) and not (PP_RE.search(text) and TG_RE.search(text)): - err_type = 'runtime' - else: - err_type = None - - # Extract performance if no load error - pp_match = PP_RE.search(text) if err_type is None else None - tg_match = TG_RE.search(text) if err_type is None else None - - for key, match in [('pp512', pp_match), ('tg128', tg_match)]: - cell = { - 'mean': match.group(1) if match else None, - 'std': match.group(2) if match else None, - 'error': err_type is not None, - 'etype': err_type - } - data.setdefault(model, {}).setdefault(key, {})[env] = cell - -# Select winner -def pick_winner(env_data): - scores = {e: float(d['mean']) for e,d in env_data.items() if not d['error'] and d['mean']} - if not scores: - return '—' - best = max(scores, key=scores.get) - others = [v for k,v in scores.items() if k!=best] - tag = f"🏆 **{best}**" - if others: - gain = (scores[best]/max(others)-1)*100 - tag += f" (+{gain:.0f}%)" - return tag - -# Render table with distinct error messages -def render_table(test_label, display_name): - print(f"### {display_name} — tokens/second\n") - header = ['Model'] + [e.replace('_',' ').title() for e in ENV_ORDER] + ['Winner'] - print("| " + " | ".join(header) + " |") - print("|" + "|".join(['---']*len(header)) + "|") - - for model in sorted(data, key=lambda s: s.lower()): - row = [f"**{model}**"] - env_data = data[model].get(test_label, {}) - for env in ENV_ORDER: - d = env_data.get(env) - if not d: - cell = '—' - elif d['error']: - et = d['etype'] - if et=='load': - cell = '⚠️ Load Error' - elif et=='hang': - cell = '⚠️ GPU Hang' - else: - cell = '⚠️ Runtime Error' - else: - cell = f"{float(d['mean']):.2f} ± {float(d['std']):.2f}" - row.append(cell) - row.append(pick_winner(env_data)) - print("| " + " | ".join(row) + " |") - print() - -# Output tables -render_table('pp512','Prompt Processing (pp512)') -render_table('tg128','Text Generation (tg128)') - -# Summary of failures by type -fail_lines = [] -for model in sorted(data, key=lambda s: s.lower()): - for test_label, envs in data[model].items(): - for env,d in envs.items(): - if d['error']: - et = d['etype'] or 'unknown' - desc = { - 'load':'failed to load', - 'hang':'GPU hang', - 'runtime':'runtime error', - }.get(et, 'error') - fail_lines.append(f"- **{model}** [{test_label}] on *{env}*: {desc}") -if fail_lines: - print("## Failed Runs\n") - print("\n".join(fail_lines)) \ No newline at end of file diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4-rocwmma__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm6_4_4__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1-rocwmma__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_1__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma-improved__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha-rocwmma__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_alpha__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc-rocwmma__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__hblt0__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__longctx32768__rpc.log diff --git a/benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log b/benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log similarity index 100% rename from benchmark/results-rpc/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log rename to benchmark/results-rpc/20-12-2025/GLM-4.6-UD-Q4_K_XL-00001-of-00005__rocm7_rc__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4-rocwmma__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm6_4_4__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1-rocwmma__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_1__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma-improved__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha-rocwmma__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_alpha__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc-rocwmma__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__hblt0__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__longctx32768__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__rocm7_rc__rpc.log diff --git a/benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log b/benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log similarity index 100% rename from benchmark/results-rpc/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log rename to benchmark/results-rpc/20-12-2025/MiniMax-M2-UD-Q6_K_XL-00001-of-00004__vulkan_amdvlk__rpc.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..b9717bd --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.74 ± 2.03 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..a4a0e10 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.13 ± 0.85 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.73 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..f8bb9fc --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 333.45 ± 1.70 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.33 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..1052ad9 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 336.20 ± 2.04 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.77 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..c2a507e --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.36 ± 0.16 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..63f3a20 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.91 ± 1.10 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..555b577 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.90 ± 1.42 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..7a33710 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 329.23 ± 1.32 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..504b7db --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 228.89 ± 0.52 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.48 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..3653d58 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 243.57 ± 0.43 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.54 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..f984551 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 275.04 ± 0.75 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.57 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..8422388 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 272.75 ± 1.25 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.56 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..45ffea3 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.38 ± 0.34 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.52 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..c29adb3 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.33 ± 0.75 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.62 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..ab6d7f5 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 254.32 ± 0.84 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.51 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..bb9476b --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 253.04 ± 1.12 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log new file mode 100644 index 0000000..ded8528 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 257.70 ± 0.50 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..499a6d6 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 259.40 ± 0.46 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.61 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..da28589 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 279.25 ± 0.28 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.61 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..8a162f3 --- /dev/null +++ b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 244.36 ± 0.45 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.73 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..de26d53 --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.84 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..bb0fc76 --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 144.36 ± 0.18 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..03f0f5b --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.01 ± 0.05 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..0ff9545 --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.28 ± 0.12 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..6224321 --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.01 ± 0.05 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..f6db3a2 --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 143.94 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..cc7a19c --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.07 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..9b4a794 --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.12 ± 0.04 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..3c6c07c --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.73 ± 0.26 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..022269b --- /dev/null +++ b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 88.29 ± 0.76 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..dcb50e9 --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 792.57 ± 2.08 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..8a899fd --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 794.96 ± 3.40 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log new file mode 100644 index 0000000..176af12 --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 802.78 ± 0.92 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..8016de9 --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 810.15 ± 2.26 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..2eb1c65 --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 781.49 ± 1.79 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..916ac50 --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 795.45 ± 1.95 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log new file mode 100644 index 0000000..23a623e --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 797.09 ± 3.94 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..036342e --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 800.44 ± 2.67 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..8803b27 --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 228.96 ± 0.25 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.20 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log new file mode 100644 index 0000000..7712766 --- /dev/null +++ b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 198.30 ± 1.06 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.57 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..5e2df6e --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.52 ± 3.84 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.23 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..f7bf3a0 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1075.82 ± 2.72 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.19 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..5ce08c0 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.90 ± 4.65 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.13 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..e818cc0 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1074.40 ± 7.61 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.17 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..248c243 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1110.04 ± 2.67 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.43 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..c6ab5d1 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1073.92 ± 6.88 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.52 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log new file mode 100644 index 0000000..31d6990 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1119.24 ± 8.14 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.47 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..6178218 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1086.57 ± 5.04 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.44 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..dbd7200 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1224.54 ± 7.71 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.56 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..08568b4 --- /dev/null +++ b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 940.69 ± 5.60 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 45.38 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..3d604c2 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 198.21 ± 1.42 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..934a790 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.49 ± 1.92 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..bb2c5e9 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 205.39 ± 1.95 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..287edc5 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.21 ± 1.57 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.97 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..90e4554 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.07 ± 0.88 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..3d9aad0 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.14 ± 1.06 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.08 ± 0.05 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log new file mode 100644 index 0000000..aee2b6f --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.48 ± 2.40 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.91 ± 0.14 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..59896b1 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f75f10bf5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f75f10bf96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f75f10bfaef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f75f3ec75f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7f75f3ed1ff5] +/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f75f3ecc83f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f75f10da483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f75f45c87e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f75f45ca2b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f75f45cf6ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f75f45d04fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7f75f0a555b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f75f0a55668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..9cfba84 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 145.16 ± 0.17 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.77 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..93bee5e --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 131.53 ± 1.13 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.08 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..911023a --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 567.78 ± 2.40 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.92 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..2b2c080 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.99 ± 6.42 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..b35dbc0 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.13 ± 3.25 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.13 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..f3428c4 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x3cc03d10) reason :GPU Hang +✖ ! [rocm6_4_4] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..b8f1d36 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 460.49 ± 1.91 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.12 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..ebf1c94 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 467.86 ± 1.23 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..7edcf87 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 468.47 ± 2.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.08 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..d7db6d1 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 470.04 ± 3.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.02 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..350bb76 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 194.36 ± 0.12 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.96 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..3fa928a --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 167.29 ± 0.18 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.36 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..1081a96 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1045.84 ± 8.87 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.04 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..40a0181 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1046.62 ± 8.31 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..96b85d0 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1065.87 ± 15.74 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.71 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..bc9b8e5 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1056.16 ± 8.88 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.68 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..248c0ca --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 961.79 ± 10.60 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.69 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..a6ced1b --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 964.88 ± 9.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log new file mode 100644 index 0000000..8acd7e7 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 992.39 ± 4.30 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.48 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..42d8f1f --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 984.99 ± 7.73 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.39 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..66e05a7 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1129.76 ± 4.79 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 62.27 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..2b85a69 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 905.18 ± 4.26 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.46 ± 0.05 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..c00de91 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1201.14 ± 12.83 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.92 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..fd3bded --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1208.02 ± 13.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.87 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log new file mode 100644 index 0000000..ebb7e8e --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.68 ± 19.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.13 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..cffdfb9 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1231.06 ± 2.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.08 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..50347bc --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1172.29 ± 9.77 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.19 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..a50a1bd --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1169.43 ± 4.95 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.31 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log new file mode 100644 index 0000000..0a23174 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1183.05 ± 9.42 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.17 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..0ecb4c7 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1195.38 ± 5.88 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..5d8e040 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 825.86 ± 2.68 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.94 ± 0.04 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log new file mode 100644 index 0000000..94bec33 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 864.66 ± 2.72 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.07 ± 0.04 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..0b57963 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 573.57 ± 2.61 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.89 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5dcd687 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 595.88 ± 2.98 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.34 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..31e9e1e --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 576.31 ± 0.99 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.64 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..7269e2e --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 590.68 ± 0.83 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.38 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..b4f68a7 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 571.05 ± 4.21 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.45 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..6a40914 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 594.40 ± 3.02 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.73 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..5299717 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 568.38 ± 2.63 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.50 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..6183815 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.50 ± 6.59 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.81 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..c79c18c --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 406.08 ± 1.14 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.67 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..45b0ac9 --- /dev/null +++ b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 326.83 ± 0.94 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 30.18 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..a2843f4 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 897.73 ± 0.63 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.14 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5bb70aa --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 904.28 ± 1.51 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..235334a --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 913.75 ± 0.60 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.21 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..184977a --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 919.42 ± 0.44 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..bc50843 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 886.59 ± 0.64 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..328096e --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 890.97 ± 0.75 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log new file mode 100644 index 0000000..05f259d --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 910.34 ± 0.61 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..9cbc2a8 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 917.22 ± 0.99 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.28 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..1e29149 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 690.02 ± 1.72 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.56 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..649e0ba --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 549.78 ± 1.79 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.95 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..e164e47 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.30 ± 0.54 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..fef7449 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 525.67 ± 0.68 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..08ada47 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.62 ± 0.27 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..59b9670 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 530.96 ± 0.63 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..cd81a2b --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 463.74 ± 0.73 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..3ec22c1 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 522.71 ± 0.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..4d7ac4f --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.55 ± 0.52 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..d9990e7 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.17 ± 0.74 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..7e00b6d --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 474.02 ± 0.22 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..7efd9cc --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +ggml_vulkan: Device memory allocation of size 2819260416 failed. +ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory +main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..6df7e0c --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.99 ± 1.50 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.93 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..73eee2d --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2907.52 ± 4.15 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.61 ± 0.04 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..069c775 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2902.86 ± 2.84 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.68 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log new file mode 100644 index 0000000..8d4790f --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2868.25 ± 16.39 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.93 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..3d1a62f --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.90 ± 17.97 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.07 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..bd61453 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2812.03 ± 15.70 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..19e21d1 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2820.50 ± 10.20 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log new file mode 100644 index 0000000..ae8df7d --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2853.13 ± 21.11 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.93 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..88706b8 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2846.23 ± 16.40 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.96 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..b62a85a --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1798.72 ± 4.50 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 90.73 ± 0.09 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log new file mode 100644 index 0000000..a154a01 --- /dev/null +++ b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1633.15 ± 4.31 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.91 ± 0.19 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..7a833ac --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 657.63 ± 7.64 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.65 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..9ed737f --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 649.55 ± 10.69 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.68 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..303a0c0 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 654.41 ± 2.17 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.37 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..aac1650 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 658.64 ± 9.76 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.93 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..dfe030e --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 637.48 ± 24.73 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.66 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..a164176 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 653.60 ± 10.65 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log new file mode 100644 index 0000000..79ce413 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.37 ± 5.37 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.13 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..0a274b3 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 646.49 ± 8.17 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.16 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a1dd94b --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 792.77 ± 0.78 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.34 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..9ffcf99 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 543.66 ± 0.88 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.37 ± 0.04 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..5486142 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1752.24 ± 12.09 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.82 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..86bf607 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1717.36 ± 12.37 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.94 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log new file mode 100644 index 0000000..62ba657 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.01 ± 23.22 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.48 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..e64cdc4 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1726.91 ± 4.81 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.44 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..7ff138e --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.28 ± 7.42 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.12 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..29b33c0 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1695.75 ± 25.43 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.15 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log new file mode 100644 index 0000000..1530264 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1670.49 ± 30.36 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.45 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..00e6a9e --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1661.92 ± 6.16 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.41 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..eff8190 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1965.23 ± 21.66 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.24 ± 0.04 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log new file mode 100644 index 0000000..297b848 --- /dev/null +++ b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1395.08 ± 16.05 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 79.60 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..8bea5c1 --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1525.39 ± 0.85 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..adc47d5 --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1524.22 ± 2.19 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1.log new file mode 100644 index 0000000..8074b6d --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1539.28 ± 0.84 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..47a5836 --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1538.89 ± 3.35 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.07 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..e534712 --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1492.67 ± 1.40 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.89 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..d3c244e --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1495.58 ± 2.18 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.97 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1.log new file mode 100644 index 0000000..0d4f6c6 --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1526.32 ± 2.10 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.96 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..5226879 --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1530.07 ± 0.42 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.01 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..8493016 --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1494.56 ± 4.36 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 56.03 ± 0.06 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1.log new file mode 100644 index 0000000..13624db --- /dev/null +++ b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1135.49 ± 4.16 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.73 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log rename to benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..01ffab8 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 80.08 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.70 ± 0.14 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..a63074a --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 6.90 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.72 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..14318e4 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.81 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.71 ± 0.13 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a63074a --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 6.90 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.72 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..665e557 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 81.03 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.07 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..19a7c47 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.37 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..5b9f63c --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 79.43 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.80 ± 0.05 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..623b913 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.42 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..56b6c4b --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 73.15 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.81 ± 0.14 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..e2939c5 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 7.28 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.61 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..b8a0d9d --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 73.91 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.75 ± 0.21 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..5ec3925 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 7.24 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.61 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..a56cbfc --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 73.64 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.74 ± 0.22 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..d33cd4c --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,18 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:96: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f1a3468b5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a3468b96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a3468baef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4e882) [0x7f1a37496882] +/usr/local/lib64/libggml-hip.so.0(+0x2d53c4e) [0x7f1a3749bc4e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1a346a2e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f1a37b75630] +/usr/local/bin/llama-bench() [0x40ae7c] +/usr/local/bin/llama-bench() [0x408bd1] +/lib64/libc.so.6(+0x35b5) [0x7f1a340215b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a34021668] +/usr/local/bin/llama-bench() [0x409cf5] +✖ ! [rocm-7alpha] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..d353fd9 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 72.94 ± 2.79 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.16 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e87a569 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.14 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..3eb2ca3 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 79.77 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.86 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..e006c01 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 9.04 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.68 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5726aaf --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.60 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.83 ± 0.03 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2662197 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 9.06 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.68 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..8b12325 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 77.29 ± 5.81 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.83 ± 0.04 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..d908ae5 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.54 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..b7571ea --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 76.84 ± 4.54 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.84 ± 0.03 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8d34cbb --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.30 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..3690a51 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 41.19 ± 7.76 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 1.87 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..fd57886 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.37 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.26 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..67ff2d8 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.53 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.96 ± 0.05 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4b28cb9 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 12.50 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.27 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index b9717bd..62546fa 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.74 ± 2.03 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.74 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 275.09 ± 0.35 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.72 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..98503f5 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.65 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.70 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index a4a0e10..2e6dbb7 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.13 ± 0.85 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.73 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 303.19 ± 1.30 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.71 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..888e430 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.90 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.70 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index f8bb9fc..ba953ff 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 333.45 ± 1.70 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.33 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 279.35 ± 0.90 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.56 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..9436551 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 37.23 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.31 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 1052ad9..82410db 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 336.20 ± 2.04 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.77 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 307.88 ± 1.76 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.76 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..148c3f1 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.53 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.32 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..d124aa9 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 188.28 ± 0.38 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.62 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f228ebe --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,18 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.50 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:96: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fecd3ac85a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fecd3ac896b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fecd3ac8aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca3912) [0x7fecd6828912] +/usr/local/lib64/libggml-hip.so.0(+0x2ca8cde) [0x7fecd682dcde] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fecd3adfe5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fecd6f07630] +/usr/local/bin/llama-bench() [0x408ca6] +/lib64/libc.so.6(+0x35b5) [0x7fecd345e5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fecd345e668] +/usr/local/bin/llama-bench() [0x409cf5] +✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..0740976 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 234.18 ± 1.90 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d395d0e --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.74 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.38 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..af2dbbf --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 190.14 ± 0.23 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.71 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..c3d2826 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.33 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..d6ec8a8 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 235.84 ± 0.85 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.71 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4f4003f --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.02 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.14 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log index c2a507e..579f66a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.36 ± 0.16 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 273.31 ± 0.52 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.70 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..a898e0f --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.48 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.58 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log index 63f3a20..83e7914 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.91 ± 1.10 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 299.53 ± 0.59 | | glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..63c2f73 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.59 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.58 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log index 555b577..f9ea3e5 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.90 ± 1.42 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 279.68 ± 1.30 | | glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..91c56ad --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 37.93 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.99 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log index 7a33710..083d2b2 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 329.23 ± 1.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 305.29 ± 1.90 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ea1b0e2 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.08 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 504b7db..b1f3f04 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 228.89 ± 0.52 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.48 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 320.89 ± 0.75 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 2.37 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..6adb97a --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 23.20 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.80 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 3653d58..2f9b902 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 243.57 ± 0.43 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.54 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 281.21 ± 0.80 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 25.02 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..88db148 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 34.18 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.41 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index f984551..765eb4b 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 275.04 ± 0.75 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.57 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 150.46 ± 0.44 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.55 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..e76112f --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.78 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.66 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index 8422388..fcd921d 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 272.75 ± 1.25 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.56 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 193.25 ± 0.21 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.55 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..dab13f2 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.69 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.66 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 45ffea3..b038542 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.38 ± 0.34 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.52 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 151.37 ± 0.24 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.55 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..aaba1d0 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 31.21 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.25 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index c29adb3..2352a80 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.33 ± 0.75 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.62 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.36 ± 1.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.61 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cf289f9 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.34 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.25 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..53dc835 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 88.49 ± 0.23 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.65 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..021cc0b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.47 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.34 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5596703 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 152.24 ± 0.60 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..949204a --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.35 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.34 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..71b4a38 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 88.58 ± 0.11 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.68 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..9601a3b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 30.36 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..c43eada --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 152.92 ± 0.24 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cd24ef1 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 35.26 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.23 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log index ab6d7f5..2adccde 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 254.32 ± 0.84 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 145.30 ± 0.23 | | glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.51 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..411b110 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.44 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.54 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log index bb9476b..595ae34 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 253.04 ± 1.12 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 185.77 ± 0.97 | | glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8b9e55a --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.15 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.55 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log index ded8528..a73c174 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 257.70 ± 0.50 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 146.64 ± 0.35 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.60 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..603c9c5 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.21 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.19 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log index 499a6d6..6c10da2 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 259.40 ± 0.46 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 188.13 ± 0.15 | | glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.61 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ec666d0 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.05 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.20 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index da28589..b242880 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 279.25 ± 0.28 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.61 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 290.49 ± 0.30 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.74 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..078fc28 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.76 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log index 8a162f3..ec49e8c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 244.36 ± 0.45 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.73 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 261.76 ± 0.99 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.93 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..ae3dc57 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.30 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.69 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index de26d53..c2291f8 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.84 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.48 ± 0.05 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..9953ee2 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.84 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.08 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index bb0fc76..c76ae37 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 144.36 ± 0.18 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.63 ± 0.10 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9eaed87 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 03f0f5b..ce0f9af 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.01 ± 0.05 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 148.26 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..7b5211d --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.54 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 0ff9545..44dfe3c 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.28 ± 0.12 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.21 ± 0.14 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..035ac3b --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.82 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..bf2412f --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 67.04 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..6a07f56 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.46 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.80 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..0028609 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 66.63 ± 0.03 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ec2902c --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.47 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.80 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..c7be012 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 67.05 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..e74c380 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.90 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..05a9c21 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 66.64 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4a65754 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.86 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log index 6224321..1719fa5 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.01 ± 0.05 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.76 ± 0.06 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..77e5068 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.13 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.06 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log index f6db3a2..0f59b59 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 143.94 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.07 ± 0.15 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e4e01b6 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.14 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.06 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log index cc7a19c..426d6b7 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.07 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 148.44 ± 0.08 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..086c92f --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.66 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log index 9b4a794..6898f44 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.12 ± 0.04 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.61 ± 0.04 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a60c480 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.99 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 3c6c07c..00c2046 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.73 ± 0.26 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.89 ± 0.24 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.81 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a1e97ab --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 18.12 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.16 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index 022269b..1f40e8f 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 88.29 ± 0.76 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 87.66 ± 0.55 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4832afb --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.96 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.39 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log index dcb50e9..02d1090 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 792.57 ± 2.08 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 792.51 ± 3.25 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..3c18947 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 54.31 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.48 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log index 8a899fd..25abb2c 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 794.96 ± 3.40 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 794.81 ± 3.31 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..58d6ce3 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 53.85 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log index 176af12..c87eda5 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 802.78 ± 0.92 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 801.73 ± 2.77 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..d252ff5 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.31 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log index 8016de9..0a3cf91 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 810.15 ± 2.26 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 805.52 ± 3.18 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1f87ce0 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.32 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..1fd1388 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 789.37 ± 1.53 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f4eb292 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 58.06 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5b49226 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 787.74 ± 4.26 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..288c978 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 57.56 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..b03e29d --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 798.60 ± 3.84 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..9a0fea9 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.77 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..09d46a4 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 799.84 ± 4.89 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6ff28f3 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 159.82 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log index 2eb1c65..a2a39be 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 781.49 ± 1.79 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 779.79 ± 2.46 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..6bf5a79 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 86.41 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.56 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log index 916ac50..b0b79dd 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 795.45 ± 1.95 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 793.84 ± 5.06 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c9d928f --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 85.44 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.55 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log index 23a623e..d804f02 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 797.09 ± 3.94 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 792.78 ± 1.08 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..90224a5 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 156.32 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log index 036342e..612fcc2 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 800.44 ± 2.67 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 803.71 ± 3.13 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3c53158 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.31 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log index 8803b27..e5b1efe 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 228.96 ± 0.25 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.20 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 187.83 ± 22.96 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.19 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..d2ca7d7 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.52 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.69 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log index 7712766..784b23b 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 198.30 ± 1.06 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.57 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 195.84 ± 0.06 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.56 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..5e0335b --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 75.42 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.23 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log index 5e2df6e..55588ef 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.52 ± 3.84 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.23 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1047.96 ± 3.88 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.16 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..d1d0f6b --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 812.61 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.43 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index f7bf3a0..7cf3ee3 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1075.82 ± 2.72 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.19 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1028.24 ± 6.22 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.14 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..0e862a7 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 811.19 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.40 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log index 5ce08c0..4aa26ac 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.90 ± 4.65 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.13 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1065.39 ± 1.75 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.12 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..504249f --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 823.17 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.90 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index e818cc0..50abf7d 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1074.40 ± 7.61 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.17 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1034.18 ± 3.12 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.08 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..fab837f --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 896.75 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.88 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..60b0e11 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 565.30 ± 3.35 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.68 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..82cb701 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 632.28 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.85 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..c5502bf --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 557.83 ± 3.26 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.65 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a477cfc --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 633.59 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.87 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..12cfa4e --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 567.35 ± 4.92 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.67 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..ec34bc3 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 660.41 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.42 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..2433767 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 560.67 ± 3.15 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9e1315b --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 663.35 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.44 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log index 248c243..72a07ab 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1110.04 ± 2.67 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.43 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1054.77 ± 4.94 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.44 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..eebc71a --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 671.89 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.37 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log index c6ab5d1..1d99572 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1073.92 ± 6.88 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.52 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1029.81 ± 2.15 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.46 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..18edd08 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 674.11 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.47 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log index 31d6990..f038039 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1119.24 ± 8.14 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1051.12 ± 10.25 | | nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.47 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..39204d6 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 704.97 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log index 6178218..da08d9b 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1086.57 ± 5.04 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.44 ± 0.03 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1028.01 ± 11.24 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.37 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3b1d0a7 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 743.16 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index dbd7200..64ecbc8 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1224.54 ± 7.71 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.56 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1253.52 ± 10.26 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 47.03 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..bae8a21 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 408.37 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log index 08568b4..aa1388b 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 940.69 ± 5.60 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 45.38 ± 0.03 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1016.39 ± 35.31 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.53 ± 0.03 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4426e2b --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 403.09 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 40.91 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 3d604c2..8dce1e8 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 198.21 ± 1.42 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.60 ± 2.04 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.10 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..0ef8d8e --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.76 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.08 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index 934a790..186db12 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.49 ± 1.92 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 200.59 ± 1.45 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.13 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..b37fb0f --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.64 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log index bb2c5e9..2d23b75 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 205.39 ± 1.95 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 206.60 ± 0.55 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.93 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..a8041b1 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.83 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.71 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 287edc5..3e46d51 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.21 ± 1.57 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.97 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.06 ± 14.56 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.02 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8658106 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.67 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.70 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..b01b440 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.43 ± 2.43 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..95e68d3 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,19 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.54 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:96: ROCm error +:0:rocdevice.cpp :3618: 63153010732 us: Callback: Queue 0x7f3225300000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f3234ef25a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3234ef296b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3234ef2aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca3912) [0x7f3237c52912] +/usr/local/lib64/libggml-hip.so.0(+0x2ca8cde) [0x7f3237c57cde] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3234f09e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3238331630] +/usr/local/bin/llama-bench() [0x408ca6] +/lib64/libc.so.6(+0x35b5) [0x7f32348885b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3234888668] +/usr/local/bin/llama-bench() [0x409cf5] +✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..e31184b --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.86 ± 2.71 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..afed40a --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.34 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.03 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..d0be143 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.07 ± 3.84 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.09 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..6ef3440 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.42 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..7607a8d --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 188.66 ± 20.66 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.61 ± 1.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8a32625 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.43 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log index 90e4554..d5f5df3 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.07 ± 0.88 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 193.57 ± 10.23 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.53 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..7ecc21d --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.05 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.90 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log index 3d9aad0..95ab277 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.14 ± 1.06 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.08 ± 0.05 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.80 ± 0.99 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.14 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..51c1f95 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.25 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.90 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log index aee2b6f..771dfa7 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.48 ± 2.40 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.91 ± 0.14 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 187.93 ± 19.38 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 12.70 ± 1.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..d353746 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Hip error: 'unspecified launch failure'(719) at /longer_pathname_so_that_rpms_can_support_packaging_the_debug_info_for_all_os_profiles/src/rocm-libraries/projects/hipblaslt/library/src/amd_detail/hipblaslt.cpp:148 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log index 59896b1..3e315d4 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -1,24 +1,7 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +ggml_cuda_init: failed to initialize ROCm: no ROCm-capable device is detected | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f75f10bf5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f75f10bf96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f75f10bfaef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f75f3ec75f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7f75f3ed1ff5] -/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f75f3ecc83f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f75f10da483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f75f45c87e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f75f45ca2b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f75f45cf6ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f75f45d04fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7f75f0a555b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f75f0a55668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 failed (exit 0) +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 17.92 ± 2.98 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 7.99 ± 0.19 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..7ec3d7e --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 57.28 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.55 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 9cfba84..74cc3d7 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 145.16 ± 0.17 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.77 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 168.14 ± 0.52 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 2.08 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..e62c799 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 17.62 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.39 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index 93bee5e..382b3ea 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 131.53 ± 1.13 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.08 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 146.89 ± 0.98 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.09 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..23324eb --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.66 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.52 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 911023a..82f3de7 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 567.78 ± 2.40 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.92 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 565.84 ± 2.23 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.86 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..d39f419 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 118.25 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.61 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 2b2c080..12a6821 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.99 ± 6.42 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 572.78 ± 6.78 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.81 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d0f4200 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 123.38 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.43 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index b35dbc0..96acea1 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.13 ± 3.25 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.13 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.12 ± 2.09 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..c06be6a --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.34 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.13 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index f3428c4..7809d51 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,6 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3cc03d10) reason :GPU Hang -✖ ! [rocm6_4_4] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 failed (exit 0) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.67 ± 2.54 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..b898478 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 244.89 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.18 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..6af7459 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 488.98 ± 3.11 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..717da60 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 136.27 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.47 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..25320db --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 490.86 ± 3.57 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cc98b6f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.40 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.48 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..050cd71 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 492.51 ± 1.28 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..80b86d3 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.91 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.28 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..acf2f5f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 494.46 ± 2.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.13 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..736f285 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.11 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log index b8f1d36..9d5b902 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 460.49 ± 1.91 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.12 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 466.91 ± 3.25 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..1bb542a --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 129.72 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 13.50 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log index ebf1c94..9e54070 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 467.86 ± 1.23 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 467.48 ± 2.87 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.09 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3c96eaa --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 126.75 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 13.51 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log index 7edcf87..2b9fd5f 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 468.47 ± 2.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.08 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 469.57 ± 3.31 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.97 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..bc82b1f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.18 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.22 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log index d7db6d1..e4e32a8 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 470.04 ± 3.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.02 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 471.12 ± 8.43 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.02 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..699cf1d --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 172.54 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.16 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 350bb76..5c4f9fe 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 194.36 ± 0.12 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.96 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 424.44 ± 1.61 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.62 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..3d61772 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 65.51 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index 3fa928a..33f57b0 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 167.29 ± 0.18 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.36 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 364.62 ± 2.62 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.49 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..9b6a21f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 93.65 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.14 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log index 1081a96..8054c25 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1045.84 ± 8.87 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.04 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1062.68 ± 4.46 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.77 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..c57b797 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.28 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.99 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index 40a0181..83d5ea8 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1046.62 ± 8.31 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1056.43 ± 9.46 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.04 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..7990d4b --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 128.77 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.98 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log index 96b85d0..6b3826e 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1065.87 ± 15.74 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.71 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1073.10 ± 11.76 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..c8a6856 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.02 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log index bc9b8e5..3e19bb9 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1056.16 ± 8.88 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.68 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1075.09 ± 15.15 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.72 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d3cb0bc --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.43 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..e2beb13 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 912.93 ± 3.31 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.61 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..87f3d6b --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 142.02 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.77 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..26f6ef8 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 918.79 ± 3.83 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.60 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e78f3ec --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 141.82 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..efb6752 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 904.96 ± 12.42 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.50 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..1446b4a --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.93 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..2b1dc75 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 906.79 ± 8.48 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.55 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6e97883 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.87 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log index 248c0ca..2a098a4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 961.79 ± 10.60 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.69 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 982.10 ± 4.60 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.77 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..cedf966 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 138.54 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.30 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log index a6ced1b..895e2f4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 964.88 ± 9.02 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 984.86 ± 9.16 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.90 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..717b9c2 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 138.98 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.33 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log index 8acd7e7..70d5d5b 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 992.39 ± 4.30 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.48 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 999.37 ± 14.29 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.24 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..e11c921 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.76 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.57 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log index 42d8f1f..f3ee322 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 984.99 ± 7.73 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.39 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1004.04 ± 12.55 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.32 ± 0.04 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..fa388a8 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 168.58 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.68 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 66e05a7..01df500 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1129.76 ± 4.79 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 62.27 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1180.84 ± 8.60 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.24 ± 0.04 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..00d42b3 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.45 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.82 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index 2b85a69..7e7199a 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 905.18 ± 4.26 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.46 ± 0.05 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1046.73 ± 6.25 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 68.71 ± 0.14 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..edb34ce --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 109.86 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 30.94 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log index c00de91..117e5b9 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1201.14 ± 12.83 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.92 ± 0.03 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1230.43 ± 11.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.83 ± 0.03 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..8bf99af --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 129.18 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log index fd3bded..9a0c06d 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1208.02 ± 13.07 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.87 ± 0.03 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.92 ± 10.08 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.92 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..17ac854 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 128.45 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log index ebb7e8e..a7ccbd8 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.68 ± 19.07 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.13 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1240.19 ± 1.93 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..4178bdc --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 286.57 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.33 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log index cffdfb9..096ba0e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1231.06 ± 2.02 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.08 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1246.06 ± 12.57 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.95 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ae4c172 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 211.86 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.37 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..029696b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1219.57 ± 14.84 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.70 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..8dd2c6f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 146.44 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.97 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..72d9ba5 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1220.24 ± 11.95 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.58 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..03fc6e4 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 140.58 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.96 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1f7c7dd --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.75 ± 5.62 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.54 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..ac7f41b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.98 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.13 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..243d803 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1228.38 ± 14.75 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.53 ± 0.03 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..097c8cf --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.67 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log index 50347bc..c4c1485 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1172.29 ± 9.77 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.19 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1199.99 ± 6.19 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.44 ± 0.03 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..221f492 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 140.63 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.09 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log index a50a1bd..ca9ecdb 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1169.43 ± 4.95 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.31 ± 0.03 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1197.52 ± 8.13 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.22 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8ad658a --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 140.70 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log index 0a23174..4051986 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1183.05 ± 9.42 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.17 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1224.80 ± 12.64 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.07 ± 0.04 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..23ad554 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.30 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log index 0ecb4c7..ede00e6 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1195.38 ± 5.88 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1228.44 ± 13.05 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.99 ± 0.03 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e3dd511 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 172.85 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log index 5d8e040..df1dc18 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 825.86 ± 2.68 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.94 ± 0.04 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1072.21 ± 149.58 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 1.52 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..8a11d0b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.87 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.45 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log index 94bec33..2539f39 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 864.66 ± 2.72 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.07 ± 0.04 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1075.31 ± 42.44 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 86.69 ± 0.11 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..6079ff7 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.06 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 33.81 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 0b57963..18ab4a2 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 573.57 ± 2.61 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.89 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 589.90 ± 6.32 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.16 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..b19fe6c --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 373.61 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.27 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 5dcd687..e524579 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 595.88 ± 2.98 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.34 ± 0.01 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 605.45 ± 4.55 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.11 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8f19370 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 380.23 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.22 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 31e9e1e..1e7106e 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 576.31 ± 0.99 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.64 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.41 ± 3.59 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.12 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..f0ae3e2 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 421.06 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.55 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 7269e2e..41593f2 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 590.68 ± 0.83 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.38 ± 0.01 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 610.91 ± 4.82 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.22 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..27251b7 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 432.47 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.56 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..5b99ce5 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 353.61 ± 9.32 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.26 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..ae39d24 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 282.62 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.83 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..b11f674 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 367.06 ± 4.12 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.31 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..78853fc --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 290.72 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.86 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..0b88bef --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 354.31 ± 5.52 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.40 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..2b3ed00 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 327.85 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.06 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..3ea4bf5 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 364.74 ± 5.05 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.38 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..f3c68f2 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 340.53 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log index b4f68a7..db0fffb 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 571.05 ± 4.21 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.45 ± 0.03 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 586.46 ± 4.62 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.73 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..233d69f --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 353.48 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.51 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log index 6a40914..2da1661 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 594.40 ± 3.02 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.73 ± 0.01 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 602.30 ± 1.01 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.74 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4b787ab --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 358.54 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.53 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log index 5299717..e3d43ae 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 568.38 ± 2.63 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.50 ± 0.02 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 586.56 ± 7.78 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.80 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..2d23fe7 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 403.28 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.53 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log index 6183815..058efe5 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.50 ± 6.59 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.81 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 609.81 ± 7.04 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.84 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ae25dd6 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 411.40 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.54 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index c79c18c..c0635f7 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 406.08 ± 1.14 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.67 ± 0.02 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 634.07 ± 4.20 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.94 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..9e31a3e --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 121.89 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 22.94 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index 45b0ac9..a95a432 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 326.83 ± 0.94 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 30.18 ± 0.02 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 538.47 ± 29.53 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 31.56 ± 0.11 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..c355291 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 211.76 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 27.44 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log index a2843f4..d8becac 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 897.73 ± 0.63 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.14 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 915.98 ± 0.27 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..0c21b83 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 144.17 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.21 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index 5bb70aa..066ece9 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 904.28 ± 1.51 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 918.69 ± 0.81 | | gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e5091cf --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 142.71 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index 235334a..ca8d844 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 913.75 ± 0.60 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.21 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 931.79 ± 1.30 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..f18870e --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 247.33 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.61 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index 184977a..7f55d20 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 919.42 ± 0.44 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 936.67 ± 1.30 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3566962 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 259.06 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..0197948 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 422.39 ± 1.28 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.16 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..4372985 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 267.47 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..7164345 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 423.79 ± 0.33 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..87784fa --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 263.80 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.09 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..c78e668 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 428.84 ± 1.18 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..5337922 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 275.24 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..7e1d970 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 431.06 ± 0.61 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..989d957 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 283.40 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log index bc50843..4a12b25 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 886.59 ± 0.64 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 897.91 ± 0.80 | | gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..4b015ca --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 155.37 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.22 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log index 328096e..03e0298 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 890.97 ± 0.75 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 905.67 ± 0.84 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1ffe911 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.87 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.26 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log index 05f259d..8c947c9 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 910.34 ± 0.61 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 926.89 ± 0.25 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.25 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..a688c0f --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 262.69 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.66 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log index 9cbc2a8..a9ef8e3 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 917.22 ± 0.99 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.28 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 934.30 ± 0.96 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3a0e24a --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.81 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.67 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 1e29149..1c332ff 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 690.02 ± 1.72 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 602.68 ± 80.42 | | gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.56 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..aa3f7c3 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.40 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.97 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index 649e0ba..8bb191e 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 549.78 ± 1.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.95 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 548.18 ± 1.59 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.94 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..7cec059 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 231.70 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index e164e47..eff348c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.30 ± 0.54 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.64 ± 0.38 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f3529f1 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 88.82 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.43 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index fef7449..d5910d6 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 525.67 ± 0.68 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 525.72 ± 0.44 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e778d5d --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 93.45 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.43 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index 08ada47..24ad07c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.62 ± 0.27 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.15 ± 0.56 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..a72de4d --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.56 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 59b9670..302fb73 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 530.96 ± 0.63 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 531.41 ± 1.00 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d754100 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 214.27 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..570fa39 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,2 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..92cea90 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 92.58 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.12 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..cc84ef4 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 523.92 ± 1.74 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1163ff1 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 92.90 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.12 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1c6842e --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 470.21 ± 1.24 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..c14e74b --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 179.14 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..efdae4b --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.32 ± 1.23 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a4539a4 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 193.22 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log index cd81a2b..e291950 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 463.74 ± 0.73 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 464.70 ± 1.33 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..c8b651c --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.77 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.48 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log index 3ec22c1..94990ff 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 522.71 ± 0.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 523.70 ± 0.88 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..5cc9f6c --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 125.47 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log index 4d7ac4f..e9c1e83 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.55 ± 0.52 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 467.63 ± 1.25 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..d71116c --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.27 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log index d9990e7..ffbc638 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.17 ± 0.74 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 527.37 ± 1.47 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d9ea532 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.00 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log index 7e00b6d..68f29ba 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -1,10 +1,2 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 474.02 ± 0.22 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 7efd9cc..ef8b29d 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2819260416 failed. -ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 failed (exit 0) +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 111.81 ± 20.34 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.85 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..e6b8559 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.77 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.40 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index 6df7e0c..e1a2ff4 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.99 ± 1.50 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.93 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.40 ± 0.65 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.92 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..f1e6c00 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.67 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log index 73eee2d..4bef89e 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2907.52 ± 4.15 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.61 ± 0.04 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2881.51 ± 4.15 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.27 ± 2.44 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..92e5c0e --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 849.40 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log index 069c775..ffcbaeb 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2902.86 ± 2.84 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.68 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2891.26 ± 1.42 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.55 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..43d80f5 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 852.79 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log index 8d4790f..c102746 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2868.25 ± 16.39 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.93 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2884.56 ± 5.24 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.80 ± 0.03 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..eeeb59c --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1446.85 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.42 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log index 3d1a62f..2a53d30 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.90 ± 17.97 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.07 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.72 ± 3.55 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.97 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a78c350 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1258.46 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..4e0ac20 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2794.32 ± 20.28 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..5408a7d --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1196.36 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.62 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..9c3ec2b --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2690.59 ± 197.24 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 68.41 ± 4.26 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..376a993 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1205.66 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.64 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..e5cb703 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2829.05 ± 14.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 82.17 ± 4.20 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..421c142 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1118.35 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..e8c43cb --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2843.10 ± 21.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.76 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..143a699 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1123.24 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log index bd61453..10dfe8b 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2812.03 ± 15.70 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2811.05 ± 10.85 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.49 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..bbe1dee --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1063.31 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.08 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log index 19e21d1..52eaf34 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2820.50 ± 10.20 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2812.26 ± 11.64 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.74 ± 8.86 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8d6d07b --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1064.82 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.06 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log index ae8df7d..8431abc 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2853.13 ± 21.11 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.93 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2657.05 ± 331.16 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 62.00 ± 3.43 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..9fb395a --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1124.76 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.78 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log index 88706b8..dbf2273 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2846.23 ± 16.40 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.96 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2818.77 ± 65.80 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 65.67 ± 4.85 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cc2947e --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1136.22 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.85 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index b62a85a..6dcd777 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1798.72 ± 4.50 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 90.73 ± 0.09 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1514.96 ± 340.21 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.61 ± 2.29 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a3d57b9 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 188.74 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 66.83 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index a154a01..ac9a8fa 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1633.15 ± 4.31 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.91 ± 0.19 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1235.50 ± 244.41 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.02 ± 2.09 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..56ae086 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 694.43 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.80 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 7a833ac..408fa56 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 657.63 ± 7.64 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.65 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 687.06 ± 8.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.55 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..6d41aef --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 307.43 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.89 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index 9ed737f..aed7951 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 649.55 ± 10.69 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.68 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 687.03 ± 1.93 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.68 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..67d8d83 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 307.27 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.89 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index 303a0c0..3553f04 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 654.41 ± 2.17 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.37 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 683.09 ± 7.89 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.50 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..64eaca1 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 334.72 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log index aac1650..78c2f6b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 658.64 ± 9.76 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.93 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 684.21 ± 8.30 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.99 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6b62e44 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 333.73 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.14 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..dd7eee3 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 278.28 ± 5.27 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.90 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..60200b7 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 190.34 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.96 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..ae1f87b --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 285.78 ± 0.23 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.91 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..688c546 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 191.59 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.94 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1ef8133 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 283.71 ± 2.88 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.21 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..2e840f6 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.09 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.30 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..679ec4a --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 284.30 ± 2.38 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.16 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..00bc827 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 198.59 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.34 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log index dfe030e..61d9850 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 637.48 ± 24.73 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.66 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 677.02 ± 4.35 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.80 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f57e1b5 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.05 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.92 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log index a164176..b067646 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 653.60 ± 10.65 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.79 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 693.01 ± 6.24 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.72 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e426fc1 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.69 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.89 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log index 79ce413..b917599 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.37 ± 5.37 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.13 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 675.95 ± 6.90 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.15 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..17ea910 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 257.37 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.80 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log index 0a274b3..83c349e 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 646.49 ± 8.17 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.16 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 667.48 ± 9.09 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.15 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..f3f74b8 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 255.32 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.87 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index a1dd94b..5b42185 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 792.77 ± 0.78 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.34 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 862.37 ± 1.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 5.59 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..2ba5783 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 183.43 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.21 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index 9ffcf99..9676853 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 543.66 ± 0.88 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.37 ± 0.04 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 585.93 ± 27.57 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 57.18 ± 0.07 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..de517e6 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 160.38 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 38.70 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log index 5486142..aaae77e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1752.24 ± 12.09 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.82 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1796.98 ± 13.31 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.90 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..852e227 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 485.17 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.14 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log index 86bf607..0f59a65 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1717.36 ± 12.37 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.94 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1788.91 ± 36.82 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.01 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..12b89e4 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 485.74 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.08 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index 62ba657..872dcfb 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.01 ± 23.22 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.48 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1776.02 ± 15.96 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.43 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..409ef11 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 596.09 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.20 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log index e64cdc4..a92dd8c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1726.91 ± 4.81 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.44 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1777.68 ± 17.27 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.45 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3d98873 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 546.37 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.26 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..6cc3e41 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 833.43 ± 3.27 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.20 ± 0.03 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..1557cf0 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 314.25 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.21 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..56acd7b --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 822.67 ± 8.71 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.17 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..30ff8b0 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 313.82 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.19 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..16a54d1 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 816.54 ± 1.21 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.67 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..8667197 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 374.10 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.98 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..12ce75a --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 821.55 ± 13.20 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.73 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4c7fde9 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 355.59 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 52.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log index 7ff138e..466c31f 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.28 ± 7.42 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.12 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1779.00 ± 10.42 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.22 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f2eebb9 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.09 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.12 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log index 29b33c0..485665b 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1695.75 ± 25.43 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.15 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1775.23 ± 21.22 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.12 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1b5c671 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.31 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.14 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log index 1530264..ecfd557 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1670.49 ± 30.36 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.45 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.76 ± 45.80 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.34 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..c21ca46 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 420.14 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 55.52 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log index 00e6a9e..4a79534 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1661.92 ± 6.16 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.41 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1732.05 ± 23.11 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.34 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d853d82 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 417.44 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 55.34 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index eff8190..ca81fc1 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1965.23 ± 21.66 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.24 ± 0.04 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1771.72 ± 240.97 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 7.95 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a764fdc --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 294.23 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.42 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index 297b848..65dad4f 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1395.08 ± 16.05 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 79.60 ± 0.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1429.10 ± 24.10 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 80.56 ± 0.18 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..b18d7b4 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 284.79 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 56.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log index 8bea5c1..002583a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1525.39 ± 0.85 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1578.53 ± 1.91 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.01 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..930e31b --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.34 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log index adc47d5..dc678a1 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1524.22 ± 2.19 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1583.99 ± 4.22 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.06 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..957c992 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.27 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index 8074b6d..28fa54f 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1539.28 ± 0.84 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1595.85 ± 4.24 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..eabd52b --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.69 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log index 47a5836..97ed387 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1538.89 ± 3.35 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.07 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1599.42 ± 4.42 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.06 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8b942a9 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 187.77 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__fa1.log new file mode 100644 index 0000000..78fccc0 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.29 ± 0.53 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.59 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..eb8fe9b --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 43.39 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..af3f138 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 670.47 ± 0.30 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.61 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e77433c --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 42.56 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..0008484 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.91 ± 0.33 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.72 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..724fd52 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.45 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..55d9338 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.59 ± 0.34 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d7a864a --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.15 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log index e534712..06aedd2 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1492.67 ± 1.40 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.89 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1547.50 ± 0.77 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.90 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f221dd4 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 84.83 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log index d3c244e..6d91a31 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1495.58 ± 2.18 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.97 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1543.82 ± 2.88 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.92 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..aac41ae --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 83.93 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log index 0d4f6c6..acd5e4e 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1526.32 ± 2.10 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.96 ± 0.03 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1578.33 ± 5.18 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.91 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log new file mode 100644 index 0000000..2bd4e89 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.75 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log index 5226879..724b4d9 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log @@ -1,10 +1,8 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1530.07 ± 0.42 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.01 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1587.41 ± 1.76 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.89 ± 0.01 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..92c1781 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.58 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.78 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index 8493016..7655db3 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1494.56 ± 4.36 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 56.03 ± 0.06 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1261.90 ± 215.63 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 6.26 ± 0.00 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..5a169c2 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.76 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 4.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index 13624db..5548853 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1135.49 ± 4.16 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.73 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1126.16 ± 2.34 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.42 ± 0.08 | -build: 2aa45ef9e (7423) +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..697ab8d --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 162.11 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.74 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/run_benchmarks.sh b/benchmark/run_benchmarks.sh index 5716b18..f1dc04d 100755 --- a/benchmark/run_benchmarks.sh +++ b/benchmark/run_benchmarks.sh @@ -27,14 +27,8 @@ echo declare -A CMDS=( [rocm6_4_4]="toolbox run -c llama-rocm-6.4.4 -- /usr/local/bin/llama-bench" - [rocm6_4_4-rocwmma]="toolbox run -c llama-rocm-6.4.4-rocwmma -- /usr/local/bin/llama-bench" [rocm7.1.1]="toolbox run -c llama-rocm-7.1.1 -- /usr/local/bin/llama-bench" - [rocm7.1.1-rocwmma]="toolbox run -c llama-rocm-7.1.1-rocwmma -- /usr/local/bin/llama-bench" - [rocm-7alpha-rocwmma-improved]="toolbox run -c llama-rocm-7alpha-rocwmma-improved -- /usr/local/bin/llama-bench" - [rocm-7alpha]="toolbox run -c llama-rocm-7alpha -- /usr/local/bin/llama-bench" - [rocm-7alpha-rocwmma]="toolbox run -c llama-rocm-7alpha-rocwmma -- /usr/local/bin/llama-bench" - [rocm7_rc]="toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench" - [rocm7_rc-rocwmma]="toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench" + [rocm7-nightlies]="toolbox run -c llama-rocm7-nightlies -- /usr/local/bin/llama-bench" [vulkan_amdvlk]="toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench" [vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench" ) @@ -91,9 +85,9 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do fi OUT="$RESULTDIR/${MODEL_NAME}__${ENV}${SUFFIX}${CTX_SUFFIX}.log" - CTX_REPS=3 + CTX_REPS=5 if [[ "$CTX" == longctx32768 ]]; then - CTX_REPS=1 + CTX_REPS=3 fi if [[ -s "$OUT" ]]; then diff --git a/benchmark/run_rpc_benchmarks.sh b/benchmark/run_rpc_benchmarks.sh index 14c1aed..c95c875 100755 --- a/benchmark/run_rpc_benchmarks.sh +++ b/benchmark/run_rpc_benchmarks.sh @@ -27,42 +27,24 @@ fi # Toolbox containers to exercise over RPC. declare -A TOOLBOX_IMAGES=( [rocm6_4_4]="llama-rocm-6.4.4" - [rocm6_4_4-rocwmma]="llama-rocm-6.4.4-rocwmma" [rocm7_1_1]="llama-rocm-7.1.1" - [rocm7_1_1-rocwmma]="llama-rocm-7.1.1-rocwmma" - [rocm7_rc]="llama-rocm-7rc" - [rocm7_rc-rocwmma]="llama-rocm-7rc-rocwmma" - [rocm7_alpha]="llama-rocm-7alpha" - [rocm7_alpha-rocwmma]="llama-rocm-7alpha-rocwmma" - [rocm7_alpha-rocwmma-improved]="llama-rocm-7alpha-rocwmma-improved" + [rocm7-nightlies]="llama-rocm7-nightlies" [vulkan_amdvlk]="llama-vulkan-amdvlk" [vulkan_radv]="llama-vulkan-radv" ) declare -A CLIENT_CMDS=( [rocm6_4_4]="toolbox run -c llama-rocm-6.4.4 -- /usr/local/bin/llama-bench" - [rocm6_4_4-rocwmma]="toolbox run -c llama-rocm-6.4.4-rocwmma -- /usr/local/bin/llama-bench" [rocm7_1_1]="toolbox run -c llama-rocm-7.1.1 -- /usr/local/bin/llama-bench" - [rocm7_1_1-rocwmma]="toolbox run -c llama-rocm-7.1.1-rocwmma -- /usr/local/bin/llama-bench" - [rocm7_rc]="toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench" - [rocm7_rc-rocwmma]="toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench" - [rocm7_alpha]="toolbox run -c llama-rocm-7alpha -- /usr/local/bin/llama-bench" - [rocm7_alpha-rocwmma]="toolbox run -c llama-rocm-7alpha-rocwmma -- /usr/local/bin/llama-bench" - [rocm7_alpha-rocwmma-improved]="toolbox run -c llama-rocm-7alpha-rocwmma-improved -- /usr/local/bin/llama-bench" + [rocm7-nightlies]="toolbox run -c llama-rocm7-nightlies -- /usr/local/bin/llama-bench" [vulkan_amdvlk]="toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench" [vulkan_radv]="toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench" ) ENVIRONMENTS=( rocm6_4_4 - rocm6_4_4-rocwmma rocm7_1_1 - rocm7_1_1-rocwmma - rocm7_rc - rocm7_rc-rocwmma - rocm7_alpha - rocm7_alpha-rocwmma - rocm7_alpha-rocwmma-improved + rocm7-nightlies vulkan_amdvlk vulkan_radv ) diff --git a/docs/index.html b/docs/index.html index 5bfab2b..f91718a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -27,9 +27,6 @@ - @@ -89,13 +86,19 @@ @@ -104,11 +107,15 @@ @@ -116,21 +123,27 @@ -