diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 7af5766..99ac9b3 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -11,7 +11,7 @@ on: default: all env: - DOCKERHUB_REPO: docker.io/kyuz0/amd-strix-halo-toolboxes + DOCKERHUB_REPO: gitea.wefers.page/julian/amd-strix-halo-toolboxes LOCAL_PREFIX: llama jobs: @@ -63,11 +63,18 @@ jobs: - name: Check out repository uses: actions/checkout@v3 - - name: Log in to Docker Hub - uses: docker/login-action@v2 + - name: Cache podman storage for ${{ matrix.backend }} + uses: actions/cache@v5 with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} + key: podman-storage-${{ matrix.backend }} + restore-keys: | + podman-storage-${{ matrix.backend }} + podman-storage + path: ~/.local/share/containers/storage + + - name: Log in to Docker Hub + run: | + podman login -u ${{ secrets.DOCKERHUB_USERNAME}} -p ${{ secrets.DOCKERHUB_TOKEN }} - name: Set build timestamp run: echo "BUILD_TS=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_ENV @@ -86,12 +93,48 @@ jobs: CHN="${DOCKERHUB_REPO}:${NAME}" echo "→ Building ${DF}" - docker build --no-cache -t "${LI}" -f "${DF}" . + + # we use buildah to eventually make use of pushing with + # zstd:chunked compression, which is much more efficient + # than dockers gzip format. + # --pull: ensure we use the latest version of the base image + # --squash: flatten the final result image into one single layer. + # Avoids large image sizes due to intermediate files + # that are irrelevant for the user + # --format oci: use the OCI image format, which allows for pushing with zstd:chunked + # --no-cache: Recompute every step in the dockerfile, even if the previous layer + # has not ben invalidated. Needed since we pull from ze internet. + # --cache-(to|from): pull/push the intermedia cache layers resulting from + # --mount options in the Dockerfile + # NOTE: we are mounting cache layers for dnf and pushing them. This cache + # layer is shared amongst all Dockerfiles, since they have the identical + # mount parameter. When parallel building with buildah, those cache layers + # compete. In parallel, they all pull the latest fitting cache, then maybe + # add some packages relevant to their specific variant, then afterwards push + # the cache again. When multiple buildahs push the dnf cache, they could invalidate + # the just-pushed cache of another builder instance, so some packages might + # always be missing. SOLUTION: we give each containers dnf cache an individual + # id, thus cache per variant. + buildah bud \ + --pull \ + --squash \ + --format oci \ + --no-cache \ + -t "${LI}" \ + -f "${DF}" \ + . + + echo "→ Running smoke test..." + podman run --rm "${LI}" llama version + podman run --rm "${LI}" llama-cli --help || { status=$?; echo "llama-cli exited with status $status"; [[ $status -eq 0 || $status -eq 1 || $status -eq 134 ]]; } + podman run --rm "${LI}" llama-server --help || { status=$?; echo "llama-server exited with status $status"; [[ $status -eq 0 || $status -eq 1 || $status -eq 134 ]]; } + + # push with zstd:chunked compression, see https://github.com/containers/storage/blob/main/docs/containers-storage-zstd-chunked.md echo "→ Tag & push immutable → ${IMM}" - docker tag "${LI}" "${IMM}" - docker push "${IMM}" + buildah tag "${LI}" "${IMM}" + buildah push --compression-format zstd:chunked "${IMM}" echo "→ Tag & push channel → ${CHN}" - docker tag "${IMM}" "${CHN}" - docker push "${CHN}" + buildah tag "${IMM}" "${CHN}" + buildah push --compression-format zstd:chunked "${CHN}" diff --git a/.gitignore b/.gitignore index ed8ebf5..88d0b24 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -__pycache__ \ No newline at end of file +__pycache__ +research \ No newline at end of file diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 07bbca3..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 72.93 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.95 ± 0.06 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index d3dcb44..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.04 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index dc35095..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.58 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 914ebcc..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.48 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index b4213c1..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.51 ± 0.07 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.72 ± 0.10 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 86f7ce7..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.57 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 2c8897a..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.24 ± 0.10 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.67 ± 0.11 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 77126a6..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.53 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 8e9d026..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 78.28 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.98 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index b8f691a..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.18 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index baf867e..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 80.59 ± 0.10 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 108f8a6..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.27 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 3836dd7..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 17.65 ± 0.01 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index b2e6658..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 4.94 ± 0.01 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 1.69 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index aeb25b4..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 54.76 ± 11.46 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 31ef62e..0000000 --- a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 7.15 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.27 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 455d013..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 454.95 ± 1.90 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 22.26 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 71489ab..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 94.79 ± 0.56 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.48 ± 0.09 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index e9729d2..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.21 ± 1.79 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index cd27b5f..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 93.83 ± 0.40 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.55 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index d58dab9..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 407.15 ± 2.05 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 21.51 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index aa07720..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 101.09 ± 0.37 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.23 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index f69d3f6..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 414.23 ± 2.09 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.11 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6b8851c..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 100.06 ± 0.38 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 15.97 ± 0.45 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 5c3d51b..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 489.62 ± 3.63 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.40 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 718fbd4..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.48 ± 1.13 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.50 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 45c08f7..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.86 ± 2.29 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9bd643e..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.06 ± 0.08 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.51 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index f8630c5..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 106.42 ± 0.08 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 10.87 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index a025866..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.09 ± 0.00 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.28 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index f2c87cb..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 333.10 ± 6.48 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 9.51 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index ad53f7f..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 78.99 ± 0.25 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.13 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index 01e9227..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.34 ± 1.32 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.94 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index e85162c..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 90.22 ± 4.88 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.35 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index da85676..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.87 ± 1.21 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.09 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 40cfc38..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.13 ± 0.15 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.56 ± 1.34 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index a68e922..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 947.86 ± 2.03 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 33.77 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 11651a6..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.85 ± 1.04 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.89 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index d44f270..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 952.84 ± 2.21 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.23 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3627139..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.01 ± 0.58 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.97 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index 35840f2..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 983.72 ± 3.21 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.20 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 7c3adf6..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 80.32 ± 1.28 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.31 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 3d7dec0..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 955.10 ± 4.53 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.16 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8bd5686..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 81.34 ± 1.80 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.32 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 5754f88..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 368.78 ± 0.17 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.80 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 58b0878..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.35 ± 0.00 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 18.75 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 8c3838a..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 877.18 ± 8.15 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.07 ± 0.78 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index f135b33..0000000 --- a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 90.27 ± 0.42 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 23.07 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index e023a01..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 48.83 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 94fd676..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.25 ± 0.17 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.45 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 3a2ff58..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.38 ± 0.03 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0c8dbc3..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.17 ± 0.18 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 238a9e8..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.04 ± 0.21 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 14bad93..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.22 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.43 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index b6f9056..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.83 ± 0.25 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0611423..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.40 ± 0.23 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index ac63a96..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 155.06 ± 0.11 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 1986253..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.36 ± 0.61 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index b16ded7..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 151.70 ± 0.21 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5bd76eb..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.35 ± 0.67 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index a342daf..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 21.74 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.81 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 23f10da..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 8.35 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.36 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 6907745..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 99.39 ± 0.58 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.76 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 36b4c25..0000000 --- a/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 11.79 ± 0.02 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.44 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log deleted file mode 100644 index df4a892..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 800.17 ± 1.72 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index f184a50..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 169.18 ± 1.16 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 32590b1..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.22 ± 2.21 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 505df24..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.11 ± 0.81 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log deleted file mode 100644 index 8118e71..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 764.18 ± 1.66 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index f3f55d3..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 166.22 ± 1.20 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 661b27c..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 766.68 ± 1.07 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6cd94b5..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 164.84 ± 1.99 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log deleted file mode 100644 index 1ef3f29..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 990.88 ± 3.15 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.50 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 5b40f9e..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 172.42 ± 3.61 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 545b269..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 799.71 ± 2.09 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5974e95..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.19 ± 1.69 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log deleted file mode 100644 index 3c69e17..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 19.70 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 8.24 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index f5ad8c4..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 16.69 ± 0.01 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.41 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log deleted file mode 100644 index e04be5a..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 222.01 ± 0.94 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 7.59 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index d5938e7..0000000 --- a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 76.47 ± 0.38 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.39 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index 2325924..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.61 ± 2.94 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.58 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 47210cf..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 562.85 ± 0.47 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.59 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 266236d..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 388.54 ± 2.76 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.61 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9c300e9..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 564.71 ± 0.81 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.60 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 9da9fbc..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1026.87 ± 6.06 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.90 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 6b04e21..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1042.36 ± 2.24 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.08 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 6463c3e..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1027.41 ± 6.28 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.05 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 34cd5ba..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1038.86 ± 3.17 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.04 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index 34472dc..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1070.15 ± 5.54 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.56 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 9a56129..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 908.79 ± 27.38 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.91 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index eefc5b7..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1038.67 ± 2.82 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.57 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 19b5211..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 821.93 ± 29.40 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.92 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 21e6010..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 676.59 ± 50.83 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 47.22 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index b7d95f1..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 342.52 ± 0.46 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 35.25 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 7ba7684..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 951.76 ± 41.03 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 46.68 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index c01a4ec..0000000 --- a/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 799.39 ± 0.69 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 41.15 ± 0.06 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index 682b041..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 202.36 ± 3.50 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.80 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 9057c7d..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.36 ± 0.87 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.65 ± 0.30 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 366d2a3..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.10 ± 8.37 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.04 ± 0.06 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index cb8f1d5..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.53 ± 0.41 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.50 ± 0.75 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index a353c9d..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 205.05 ± 3.62 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 14.98 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index e89ce17..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.11 ± 0.62 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.63 ± 0.07 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 50fa18d..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 203.41 ± 3.52 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.00 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index e681c9d..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.19 ± 0.64 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.58 ± 0.10 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index 803ea12..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.04 ± 4.11 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.19 ± 0.09 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 7bd5d26..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.42 ± 0.37 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.94 ± 0.42 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 291f2c8..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 197.48 ± 10.80 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.20 ± 0.08 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index a79a619..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.60 ± 0.36 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.81 ± 0.61 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 5264d6e..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 119.82 ± 3.30 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 17.75 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 8ecec15..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 27.41 ± 0.01 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.42 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 67b83cc..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 133.28 ± 1.45 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 15.98 ± 0.25 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 7945690..0000000 --- a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 30.79 ± 0.06 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.50 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index f8f2cd5..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 489.11 ± 2.88 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.18 ± 0.16 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index c30a4c0..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 214.97 ± 1.13 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.57 ± 1.40 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 3f8e597..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.32 ± 2.55 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.23 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 23b0efd..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 207.64 ± 0.55 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.84 ± 0.97 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 4bf79a0..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 576.03 ± 3.01 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 26.12 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index d1e5d2a..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 249.94 ± 1.13 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.18 ± 0.33 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 3d12444..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 569.42 ± 8.52 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.07 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index fedbfed..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 250.24 ± 0.88 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.77 ± 0.98 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index a6ca809..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 512.10 ± 4.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.27 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index c1c2630..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.18 ± 0.74 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.71 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 7e6c7c6..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 493.72 ± 3.45 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.32 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index aec4b8d..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 209.02 ± 0.16 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.67 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 4f9d7a2..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 168.95 ± 7.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 10.62 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 69c594e..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 75.04 ± 0.02 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.68 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index e9a9b4f..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 351.97 ± 2.56 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 9.42 ± 0.21 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index e5c1a75..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 127.67 ± 0.45 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.31 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index 0dd1297..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 815.37 ± 5.82 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.54 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index cef8f0a..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.56 ± 4.38 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.30 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index c248812..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 811.39 ± 6.56 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index d87028a..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 171.54 ± 4.45 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.29 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 73135a6..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1078.99 ± 11.01 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 56.45 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 0fa6c28..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.17 ± 8.71 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.94 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index e4c7e21..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1080.52 ± 10.73 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 57.49 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index bb64b06..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.42 ± 7.66 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.96 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index 75d0678..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1056.78 ± 36.08 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.15 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index fef8ac0..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.40 ± 4.11 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index e795bb5..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1039.16 ± 53.94 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.16 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index cacca70..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 174.67 ± 4.22 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 3e8cb37..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 823.08 ± 48.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 66.14 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 7e07c93..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 112.99 ± 0.13 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 27.35 ± 0.07 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 9fd3aa4..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1064.73 ± 70.49 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 68.93 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index f189e87..0000000 --- a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.30 ± 3.42 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 34.18 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log deleted file mode 100644 index 33abf49..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1209.23 ± 7.46 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 1e0bd42..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 177.01 ± 5.01 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.40 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 3c417e5..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.91 ± 9.78 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index d871650..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.56 ± 3.86 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.37 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log deleted file mode 100644 index 22e9a0f..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1263.87 ± 7.23 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.78 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 0afdf48..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.20 ± 8.55 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.48 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 5e79f7a..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1260.69 ± 6.89 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.94 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index a96e13b..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.34 ± 7.55 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.52 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log deleted file mode 100644 index 3b0cc22..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1241.85 ± 15.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.57 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 5027639..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.98 ± 4.28 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.43 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 70fc750..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1229.55 ± 20.23 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.45 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0b7f117..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.47 ± 4.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.44 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log deleted file mode 100644 index f27f9db..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 846.24 ± 47.60 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 86.32 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index c8693bb..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 114.18 ± 0.07 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 30.07 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log deleted file mode 100644 index 4e7288a..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1005.90 ± 6.10 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 79.55 ± 6.96 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 15abf33..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 153.83 ± 3.76 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.44 ± 0.05 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index f5ae063..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 193.67 ± 2.12 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.98 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index d5132fd..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.45 ± 0.19 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.87 ± 3.54 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index d64554b..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 263.91 ± 3.82 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.97 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 206af26..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 296.64 ± 0.50 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.44 ± 4.28 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index a915bc9..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.54 ± 4.38 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.45 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 99b98d0..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 449.68 ± 1.06 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.10 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 3d76bbf..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.83 ± 4.39 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.75 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index c1c013e..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 448.82 ± 1.02 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.07 ± 0.35 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index b89bb7b..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 282.60 ± 2.04 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.89 ± 0.20 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 5d5c0db..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 238.71 ± 0.62 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.93 ± 4.11 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index b8878a5..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 590.03 ± 3.05 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.73 ± 0.52 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index be1cd00..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 413.78 ± 0.61 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.54 ± 3.30 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 35f7add..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 426.39 ± 3.26 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 31.84 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 9ed3aa4..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 228.41 ± 1.50 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 22.47 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 19a2fa7..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 509.22 ± 20.34 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 29.92 ± 0.05 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index faa295c..0000000 --- a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 370.94 ± 32.12 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 26.00 ± 0.20 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index a9d4fa7..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.33 ± 0.27 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index ee21601..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 232.79 ± 5.34 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 9ed3a0f..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.44 ± 0.31 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 45ba7e6..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 229.19 ± 6.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 86e7334..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 936.69 ± 1.33 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.23 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 9364a43..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 258.34 ± 1.81 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 2eda1f4..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 935.37 ± 1.09 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.20 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5f3c340..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.44 ± 5.27 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.62 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index fb87a4a..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 943.63 ± 1.62 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 14934cf..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.59 ± 26.74 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index ba60108..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 942.52 ± 1.34 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 878aaa3..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 376.68 ± 9.34 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 55932ee..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 125.50 ± 0.06 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.45 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 41a43d8..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 111.11 ± 0.04 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.40 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index edac157..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 687.05 ± 0.75 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.14 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 24d4dab..0000000 --- a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 376.92 ± 18.46 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.72 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 1ff6f83..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 463.92 ± 1.19 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index b36f842..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.32 ± 3.30 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.68 ± 0.11 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index c643303..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 528.00 ± 0.44 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index cc34693..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 201.67 ± 1.78 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.74 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 00bcee5..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.08 ± 0.85 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 12a07a9..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.44 ± 2.25 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 97f96ee..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.48 ± 0.88 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 93404ec..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 220.03 ± 0.98 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index d8498c8..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 549.57 ± 2.42 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 08d310a..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.98 ± 0.94 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 59af47c..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 529.01 ± 0.98 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 359ca8d..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 204.92 ± 2.92 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index db92211..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 9.32 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.87 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 7a34429..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 9.20 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.60 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 858bf96..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 123.07 ± 0.27 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.92 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 767256d..0000000 --- a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 82.96 ± 0.72 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.66 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log deleted file mode 100644 index c50aeb9..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2870.77 ± 12.89 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 861d2dd..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1639.03 ± 15.14 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.51 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 7ec3e36..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2807.93 ± 16.33 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.66 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index aba12ed..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1262.15 ± 24.34 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.54 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log deleted file mode 100644 index 6b110c7..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2891.85 ± 2.60 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.18 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 71df791..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1600.62 ± 30.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.21 ± 0.11 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 98f27b4..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2893.75 ± 3.92 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.15 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index d1578cd..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1419.18 ± 40.21 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.24 ± 0.11 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log deleted file mode 100644 index fe1a281..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2805.65 ± 13.25 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.35 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index ef479c4..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1215.66 ± 10.33 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.02 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index da4aac2..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2800.57 ± 47.75 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.47 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index e240976..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1214.20 ± 13.26 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.03 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log deleted file mode 100644 index 6ebe735..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 657.19 ± 0.41 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 86.55 ± 0.10 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 404a158..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 493.70 ± 0.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 58.57 ± 0.11 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log deleted file mode 100644 index 0537089..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 1977.82 ± 204.87 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 91.09 ± 3.96 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index e118216..0000000 --- a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 1149.92 ± 30.21 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 67.86 ± 0.22 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index f098ce7..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.09 ± 1.36 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.77 ± 0.73 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 0d8b56a..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.33 ± 0.32 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.51 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index d6ccfa5..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.23 ± 1.39 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.06 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9bfd24d..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.30 ± 0.10 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.08 ± 4.33 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index a05b582..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 681.58 ± 4.94 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 50.85 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 5948f34..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.76 ± 35.92 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.13 ± 0.05 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index abd20b5..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 678.97 ± 4.29 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.88 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index e3aae6c..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 400.64 ± 35.51 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 37.97 ± 3.34 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index 5365be2..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 649.28 ± 39.54 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.00 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index c611dc9..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 259.94 ± 5.81 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.30 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 83fdee3..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 666.65 ± 12.50 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.05 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 420ec5d..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.35 ± 6.47 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.36 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 4e6b4ce..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 643.04 ± 39.69 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 54.00 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 394417c..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 195.45 ± 2.65 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.02 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 40087c2..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 597.02 ± 9.82 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 57.38 ± 0.04 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index e8acd38..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 278.37 ± 7.19 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 42.78 ± 0.09 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log deleted file mode 100644 index 78c56b0..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 547.85 ± 6.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index e4ebb3b..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.32 ± 0.67 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.50 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index bb60d45..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.41 ± 6.71 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index f06a549..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.65 ± 1.50 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.58 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log deleted file mode 100644 index 77a4b05..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1779.88 ± 16.15 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.26 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 7323d53..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 583.30 ± 9.18 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.85 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 537f920..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1785.44 ± 15.68 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.22 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9599307..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 600.15 ± 13.61 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.78 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log deleted file mode 100644 index adb683c..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1742.62 ± 12.05 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.51 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 46f28e1..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 428.95 ± 5.63 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.82 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index c06bd1f..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1730.96 ± 9.70 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.53 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index e0f62d9..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 425.86 ± 3.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.90 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log deleted file mode 100644 index fd7fc59..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1300.97 ± 78.99 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 77.58 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 92c9d52..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 337.80 ± 4.40 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 53.06 ± 0.10 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log deleted file mode 100644 index 5534eab..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1397.71 ± 70.15 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 80.99 ± 0.06 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 9f1beb7..0000000 --- a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 416.91 ± 7.90 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 60.56 ± 0.77 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log deleted file mode 100644 index 4aeb30d..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.58 ± 0.25 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.55 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index a671c82..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 144.34 ± 1.03 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 8caec46..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 548.97 ± 0.14 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.52 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 259fde9..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 143.70 ± 0.31 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log deleted file mode 100644 index d34295f..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1597.02 ± 1.89 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 0f41bbb..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.62 ± 1.29 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.93 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index ed7cd83..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1598.36 ± 1.08 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index e9332b6..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.18 ± 2.26 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.94 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log deleted file mode 100644 index dd9ab75..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.62 ± 1.92 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.17 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 8634ecc..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 210.19 ± 4.57 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 08f165c..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.40 ± 2.71 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.21 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 355d6a7..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.78 ± 3.22 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log deleted file mode 100644 index b7dc396..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 349.95 ± 0.30 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 56.00 ± 0.22 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 34caa8e..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.53 ± 0.11 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 9.29 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log deleted file mode 100644 index e89bb38..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 1355.55 ± 2.34 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 55.88 ± 0.13 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 2ca9650..0000000 --- a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 246.20 ± 1.24 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.76 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/system_info.json b/benchmark/results/04-02-2026/system_info.json deleted file mode 100644 index b7fac37..0000000 --- a/benchmark/results/04-02-2026/system_info.json +++ /dev/null @@ -1 +0,0 @@ -{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.18.5-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260110-1.fc43.noarch", "timestamp": "04 Feb 2026"} diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 6fe8641..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 66.52 ± 7.27 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.76 ± 0.08 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index f2148c0..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 648542a..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.47 ± 0.20 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.82 ± 0.07 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 2892f38..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.47 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 665e557..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 81.03 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.07 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 19a7c47..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.37 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 5b9f63c..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 79.43 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.80 ± 0.05 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 623b913..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.42 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index a56cbfc..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 73.64 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.74 ± 0.22 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index d33cd4c..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,18 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:96: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f1a3468b5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a3468b96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a3468baef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4e882) [0x7f1a37496882] -/usr/local/lib64/libggml-hip.so.0(+0x2d53c4e) [0x7f1a3749bc4e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1a346a2e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f1a37b75630] -/usr/local/bin/llama-bench() [0x40ae7c] -/usr/local/bin/llama-bench() [0x408bd1] -/lib64/libc.so.6(+0x35b5) [0x7f1a340215b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a34021668] -/usr/local/bin/llama-bench() [0x409cf5] -✖ ! [rocm-7alpha] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index d353fd9..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 72.94 ± 2.79 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.16 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index e87a569..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.14 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index 8b12325..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 77.29 ± 5.81 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.83 ± 0.04 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index d908ae5..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.54 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.00 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index b7571ea..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 76.84 ± 4.54 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.84 ± 0.03 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8d34cbb..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.30 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.00 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 3690a51..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 41.19 ± 7.76 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 1.87 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index fd57886..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.37 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.26 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 67ff2d8..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.53 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.96 ± 0.05 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 4b28cb9..0000000 --- a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 12.50 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.27 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index f3637a5..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 133.48 ± 0.41 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 22.52 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index c5e6271..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 34.18 ± 0.22 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 8.12 ± 0.14 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index cb1d652..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 181.28 ± 1.15 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 22.65 ± 0.06 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 943b26b..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 35.28 ± 0.24 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.97 ± 0.43 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index ba953ff..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 279.35 ± 0.90 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.56 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 9436551..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 37.23 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.31 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 82410db..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 307.88 ± 1.76 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.76 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 148c3f1..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.53 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.32 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index af2dbbf..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 190.14 ± 0.23 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.71 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index c3d2826..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.33 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index d6ec8a8..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 235.84 ± 0.85 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.71 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4f4003f..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.02 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.14 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index f9ea3e5..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 279.68 ± 1.30 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 91c56ad..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 37.93 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.99 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 083d2b2..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 305.29 ± 1.90 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index ea1b0e2..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.08 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.00 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index b1f3f04..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 320.89 ± 0.75 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 2.37 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 6adb97a..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 23.20 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.80 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 2f9b902..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 281.21 ± 0.80 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 25.02 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 88db148..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 34.18 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.41 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index 6d916e4..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 56.12 ± 0.13 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 16.60 ± 0.04 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 4c9a183..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 22.87 ± 0.07 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.89 ± 0.67 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 79e4936..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 105.50 ± 0.54 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 16.65 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6020dcf..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 32.93 ± 0.13 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.24 ± 0.04 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index b038542..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 151.37 ± 0.24 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.55 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index aaba1d0..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 31.21 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.25 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 2352a80..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.36 ± 1.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.61 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index cf289f9..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.34 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.25 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index 71b4a38..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 88.58 ± 0.11 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.68 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 9601a3b..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 30.36 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.24 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index c43eada..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 152.92 ± 0.24 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index cd24ef1..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 35.26 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.23 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log deleted file mode 100644 index a73c174..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 146.64 ± 0.35 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.60 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 603c9c5..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.21 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.19 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 6c10da2..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 188.13 ± 0.15 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.61 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index ec666d0..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.05 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.20 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index b242880..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 290.49 ± 0.30 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.74 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 078fc28..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.76 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index ec49e8c..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 261.76 ± 0.99 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.93 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index ae3dc57..0000000 --- a/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.30 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.69 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index eb98cfd..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.50 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 6c2824c..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 27.87 ± 0.75 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.42 ± 0.06 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 1533579..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.27 ± 0.02 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index c15e681..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 28.46 ± 0.36 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.44 ± 0.04 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index ce0f9af..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 148.26 ± 0.07 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 7b5211d..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.54 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 44dfe3c..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.21 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 035ac3b..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.82 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index c7be012..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 67.05 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index e74c380..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.90 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 05a9c21..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 66.64 ± 0.02 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4a65754..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.86 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index 426d6b7..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 148.44 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 086c92f..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.66 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 6898f44..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.61 ± 0.04 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index a60c480..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.99 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 00c2046..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.89 ± 0.24 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.81 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index a1e97ab..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 18.12 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.16 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 1f40e8f..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 87.66 ± 0.55 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 4832afb..0000000 --- a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.96 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.39 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log deleted file mode 100644 index 921c345..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 797.13 ± 2.39 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 9dfec45..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 146.47 ± 5.52 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 0832956..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.39 ± 2.22 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 91e85e1..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 152.56 ± 6.51 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log deleted file mode 100644 index c87eda5..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 801.73 ± 2.77 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index d252ff5..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.31 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 0a3cf91..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 805.52 ± 3.18 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1f87ce0..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.32 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log deleted file mode 100644 index b03e29d..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 798.60 ± 3.84 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 9a0fea9..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.77 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 09d46a4..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 799.84 ± 4.89 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6ff28f3..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 159.82 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 90224a5..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 156.32 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 612fcc2..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 803.71 ± 3.13 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3c53158..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.31 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log deleted file mode 100644 index e5b1efe..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 187.83 ± 22.96 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.19 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index d2ca7d7..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.52 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.69 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log deleted file mode 100644 index 784b23b..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 195.84 ± 0.06 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.56 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 5e0335b..0000000 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 75.42 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.23 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index a25472a..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.57 ± 2.37 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.69 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 7ccfdf7..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 559.16 ± 0.99 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.74 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index eaea7c6..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 389.10 ± 3.02 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.68 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index d0ae54b..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 558.52 ± 1.33 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.73 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 4aa26ac..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1065.39 ± 1.75 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.12 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 504249f..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 823.17 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.90 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 50abf7d..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1034.18 ± 3.12 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.08 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index fab837f..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 896.75 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.88 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index 12cfa4e..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 567.35 ± 4.92 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.67 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index ec34bc3..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 660.41 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.42 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 2433767..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 560.67 ± 3.15 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.63 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9e1315b..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 663.35 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.44 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log deleted file mode 100644 index f038039..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1051.12 ± 10.25 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.47 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 39204d6..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 704.97 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.02 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index da08d9b..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1028.01 ± 11.24 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.37 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3b1d0a7..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 743.16 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.05 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 64ecbc8..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1253.52 ± 10.26 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 47.03 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index bae8a21..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 408.37 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.93 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index aa1388b..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1016.39 ± 35.31 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.53 ± 0.03 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 4426e2b..0000000 --- a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 403.09 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 40.91 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index 8ce6864..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 178.32 ± 26.83 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.97 ± 0.98 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 9777216..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.11 ± 0.36 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.90 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index ce3e316..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 167.63 ± 28.76 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.73 ± 0.74 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 929e46f..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 37.92 ± 0.33 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.78 ± 0.20 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 2d23b75..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 206.60 ± 0.55 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.93 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index a8041b1..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.83 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.71 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 3e46d51..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.06 ± 14.56 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.02 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8658106..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.67 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.70 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index d0be143..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.07 ± 3.84 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.09 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 6ef3440..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.42 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.00 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 7607a8d..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 188.66 ± 20.66 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.61 ± 1.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8a32625..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.43 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.01 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log deleted file mode 100644 index 771dfa7..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 187.93 ± 19.38 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 12.70 ± 1.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index d353746..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Hip error: 'unspecified launch failure'(719) at /longer_pathname_so_that_rpms_can_support_packaging_the_debug_info_for_all_os_profiles/src/rocm-libraries/projects/hipblaslt/library/src/amd_detail/hipblaslt.cpp:148 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 3e315d4..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,7 +0,0 @@ -ggml_cuda_init: failed to initialize ROCm: no ROCm-capable device is detected -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 17.92 ± 2.98 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 7.99 ± 0.19 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 7ec3d7e..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 57.28 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.55 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 74cc3d7..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 168.14 ± 0.52 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 2.08 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index e62c799..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 17.62 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.39 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 382b3ea..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 146.89 ± 0.98 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.09 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 23324eb..0000000 --- a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.66 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.52 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 3aa8493..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 484.09 ± 10.61 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.12 ± 0.16 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 4f16aaf..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 162.38 ± 4.20 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.71 ± 1.16 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 50fd7e6..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.54 ± 2.48 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.09 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3bcfa1d..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.72 ± 4.90 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.36 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 96acea1..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.12 ± 2.09 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index c06be6a..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.34 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.13 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 7809d51..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.67 ± 2.54 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index b898478..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 244.89 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.18 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 050cd71..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 492.51 ± 1.28 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 80b86d3..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.91 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.28 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index acf2f5f..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 494.46 ± 2.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.13 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 736f285..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.11 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.24 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index 2b9fd5f..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 469.57 ± 3.31 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.97 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index bc82b1f..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.18 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.22 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index e4e32a8..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 471.12 ± 8.43 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.02 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 699cf1d..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 172.54 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.16 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 5c4f9fe..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 424.44 ± 1.61 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.62 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 3d61772..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 65.51 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.05 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 33f57b0..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 364.62 ± 2.62 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.49 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 9b6a21f..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 93.65 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.14 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index a326a57..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 813.78 ± 5.52 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index a38fcee..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 154.84 ± 3.34 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.32 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index dba8fa9..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 789.10 ± 47.98 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.51 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 49dce2c..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 155.23 ± 3.28 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.27 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 6b3826e..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1073.10 ± 11.76 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index c8a6856..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.02 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.04 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 3e19bb9..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1075.09 ± 15.15 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.72 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index d3cb0bc..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.43 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.02 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index efb6752..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 904.96 ± 12.42 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.50 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 1446b4a..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.93 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.07 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 2b1dc75..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 906.79 ± 8.48 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.55 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6e97883..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.87 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.05 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log deleted file mode 100644 index 70d5d5b..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 999.37 ± 14.29 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.24 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index e11c921..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.76 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.57 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index f3ee322..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1004.04 ± 12.55 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.32 ± 0.04 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index fa388a8..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 168.58 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.68 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 01df500..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1180.84 ± 8.60 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.24 ± 0.04 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 00d42b3..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.45 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.82 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 7e7199a..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1046.73 ± 6.25 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 68.71 ± 0.14 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index edb34ce..0000000 --- a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 109.86 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 30.94 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log deleted file mode 100644 index 06add3d..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.32 ± 7.42 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.51 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 3022aa0..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.97 ± 3.98 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.41 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 101bd0a..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1096.05 ± 129.46 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.57 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 54589c8..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 160.95 ± 3.41 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.37 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log deleted file mode 100644 index a7ccbd8..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1240.19 ± 1.93 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 4178bdc..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 286.57 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.33 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 096ba0e..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1246.06 ± 12.57 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.95 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index ae4c172..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 211.86 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.37 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log deleted file mode 100644 index 1f7c7dd..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.75 ± 5.62 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.54 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index ac7f41b..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.98 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.13 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 243d803..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1228.38 ± 14.75 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.53 ± 0.03 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 097c8cf..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.67 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.01 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log deleted file mode 100644 index 4051986..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1224.80 ± 12.64 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.07 ± 0.04 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 23ad554..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.30 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.02 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index ede00e6..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1228.44 ± 13.05 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.99 ± 0.03 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index e3dd511..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 172.85 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.05 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log deleted file mode 100644 index df1dc18..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1072.21 ± 149.58 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 1.52 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 8a11d0b..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.87 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.45 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log deleted file mode 100644 index 2539f39..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1075.31 ± 42.44 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 86.69 ± 0.11 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 6079ff7..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.06 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 33.81 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index fd2dcb3..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 211.96 ± 2.48 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.74 ± 0.49 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index e9297f0..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 239.82 ± 0.51 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 24.76 ± 1.87 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index fe3532d..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 262.57 ± 3.77 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.94 ± 0.05 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index d6b3f2b..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 295.41 ± 0.37 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.85 ± 3.58 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 1e7106e..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.41 ± 3.59 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.12 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index f0ae3e2..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 421.06 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.55 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 41593f2..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 610.91 ± 4.82 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.22 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 27251b7..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 432.47 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.56 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 0b88bef..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 354.31 ± 5.52 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.40 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 2b3ed00..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 327.85 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.06 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 3ea4bf5..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 364.74 ± 5.05 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.38 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index f3c68f2..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 340.53 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.05 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index e3d43ae..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 586.56 ± 7.78 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.80 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 2d23fe7..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 403.28 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.53 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 058efe5..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 609.81 ± 7.04 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.84 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index ae25dd6..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 411.40 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.54 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index c0635f7..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 634.07 ± 4.20 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.94 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 9e31a3e..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 121.89 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 22.94 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index a95a432..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 538.47 ± 29.53 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 31.56 ± 0.11 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index c355291..0000000 --- a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 211.76 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 27.44 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index a2749f2..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.22 ± 0.21 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 9f14bc9..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 231.95 ± 3.74 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 5cc6d4b..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.04 ± 0.14 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index d9d363a..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 230.54 ± 3.36 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index ca8d844..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 931.79 ± 1.30 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index f18870e..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 247.33 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.61 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 7f55d20..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 936.67 ± 1.30 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3566962..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 259.06 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index c78e668..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 428.84 ± 1.18 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 5337922..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 275.24 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 7e1d970..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 431.06 ± 0.61 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 989d957..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 283.40 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log deleted file mode 100644 index 8c947c9..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 926.89 ± 0.25 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.25 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index a688c0f..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 262.69 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.66 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index a9ef8e3..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 934.30 ± 0.96 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3a0e24a..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.81 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.67 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 1c332ff..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 602.68 ± 80.42 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.56 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index aa3f7c3..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.40 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.97 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 8bb191e..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 548.18 ± 1.59 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.94 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 7cec059..0000000 --- a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 231.70 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.11 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index e6e1d62..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 430.73 ± 1.02 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 3.86 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index f7bcf1c..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.46 ± 0.50 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.69 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 02273a2..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 525.55 ± 1.67 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 76656be..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 202.49 ± 3.52 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 24ad07c..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.15 ± 0.56 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index a72de4d..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.56 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 302fb73..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 531.41 ± 1.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index d754100..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 214.27 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 1c6842e..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 470.21 ± 1.24 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index c14e74b..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 179.14 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index efdae4b..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.32 ± 1.23 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index a4539a4..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 193.22 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index e9c1e83..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 467.63 ± 1.25 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index d71116c..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.27 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index ffbc638..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 527.37 ± 1.47 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index d9ea532..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.00 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 68f29ba..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,2 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index ef8b29d..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 111.81 ± 20.34 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.85 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index e6b8559..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.77 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.40 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index e1a2ff4..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.40 ± 0.65 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.92 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index f1e6c00..0000000 --- a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.09 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.67 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log deleted file mode 100644 index 83da529..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2858.32 ± 17.99 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 5def9e8..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1182.57 ± 31.53 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 455b875..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2679.11 ± 228.92 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 70.08 ± 4.20 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index ebaaee2..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1138.90 ± 19.11 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.04 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log deleted file mode 100644 index c102746..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2884.56 ± 5.24 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.80 ± 0.03 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index eeeb59c..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1446.85 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.42 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 2a53d30..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.72 ± 3.55 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.97 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index a78c350..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1258.46 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.59 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log deleted file mode 100644 index e5cb703..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2829.05 ± 14.01 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 82.17 ± 4.20 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 421c142..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1118.35 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index e8c43cb..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2843.10 ± 21.02 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.76 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 143a699..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1123.24 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log deleted file mode 100644 index 8431abc..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2657.05 ± 331.16 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 62.00 ± 3.43 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 9fb395a..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1124.76 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.78 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index dbf2273..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2818.77 ± 65.80 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 65.67 ± 4.85 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index cc2947e..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1136.22 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.85 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log deleted file mode 100644 index 6dcd777..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1514.96 ± 340.21 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.61 ± 2.29 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index a3d57b9..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 188.74 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 66.83 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log deleted file mode 100644 index ac9a8fa..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1235.50 ± 244.41 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.02 ± 2.09 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 56ae086..0000000 --- a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 694.43 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.80 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index 2299f09..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.34 ± 1.51 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.25 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 6275774..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.76 ± 0.23 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.60 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index c90c1af..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.33 ± 1.42 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.27 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index c7a144b..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.89 ± 4.16 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.64 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 3553f04..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 683.09 ± 7.89 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.50 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 64eaca1..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 334.72 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.07 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 78c2f6b..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 684.21 ± 8.30 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.99 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6b62e44..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 333.73 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.14 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index a0cb708..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 674.34 ± 3.87 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.10 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 5cd16ba..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 352.35 ± 0.56 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.44 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 8898523..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 679.62 ± 5.41 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.11 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 75ba15a..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 352.62 ± 1.18 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.46 ± 0.03 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log deleted file mode 100644 index b917599..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 675.95 ± 6.90 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.15 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 17ea910..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 257.37 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.80 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 83c349e..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 667.48 ± 9.09 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.15 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index f3f74b8..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 255.32 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.87 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 5b42185..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 862.37 ± 1.02 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 5.59 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 2ba5783..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 183.43 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.21 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 9676853..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 585.93 ± 27.57 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 57.18 ± 0.07 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index de517e6..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 160.38 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 38.70 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log deleted file mode 100644 index 4b54d27..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 548.07 ± 6.87 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.66 ± 0.02 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index ae41eeb..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.62 ± 3.36 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 9aea9e3..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.89 ± 7.03 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.57 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9029d6a..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.07 ± 4.50 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log deleted file mode 100644 index 872dcfb..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1776.02 ± 15.96 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.43 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 409ef11..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 596.09 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.20 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index a92dd8c..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1777.68 ± 17.27 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.45 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3d98873..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 546.37 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.26 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log deleted file mode 100644 index e0b5cee..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1734.91 ± 19.75 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.48 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index e5b1d94..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 557.68 ± 0.88 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.80 ± 0.01 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 5330505..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1709.32 ± 28.51 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.53 ± 0.00 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 52dd5fe..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 541.93 ± 1.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.84 ± 0.02 | - -build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log deleted file mode 100644 index ecfd557..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.76 ± 45.80 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.34 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index c21ca46..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 420.14 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 55.52 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 4a79534..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1732.05 ± 23.11 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.34 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index d853d82..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 417.44 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 55.34 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log deleted file mode 100644 index ca81fc1..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1771.72 ± 240.97 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 7.95 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index a764fdc..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 294.23 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.42 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log deleted file mode 100644 index 65dad4f..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1429.10 ± 24.10 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 80.56 ± 0.18 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index b18d7b4..0000000 --- a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 284.79 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 56.04 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log deleted file mode 100644 index 4834219..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.23 ± 0.33 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.62 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index fa012c7..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 139.39 ± 0.75 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.59 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 58b1ece..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.22 ± 0.08 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.67 ± 0.01 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1f2b6d2..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 137.30 ± 1.39 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | - -build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log deleted file mode 100644 index 28fa54f..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1595.85 ± 4.24 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index eabd52b..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.69 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 97ed387..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1599.42 ± 4.42 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.06 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8b942a9..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 187.77 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log deleted file mode 100644 index 0008484..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.91 ± 0.33 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.72 ± 0.02 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 724fd52..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.45 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 55d9338..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.59 ± 0.34 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index d7a864a..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.15 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1.log deleted file mode 100644 index acd5e4e..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1578.33 ± 5.18 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.91 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log deleted file mode 100644 index 2bd4e89..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.75 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.79 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 724b4d9..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1587.41 ± 1.76 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.89 ± 0.01 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 92c1781..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.58 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.78 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log deleted file mode 100644 index 7655db3..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1261.90 ± 215.63 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 6.26 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 5a169c2..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.76 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 4.00 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log deleted file mode 100644 index 5548853..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1126.16 ± 2.34 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.42 ± 0.08 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 697ab8d..0000000 --- a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 162.11 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.74 ± 0.00 | - -build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/system_info.json b/benchmark/results/09-01-2026/system_info.json deleted file mode 100644 index d29bc7f..0000000 --- a/benchmark/results/09-01-2026/system_info.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "distro": "Fedora Linux 42 (Workstation Edition)", - "kernel": "6.18.3-100.fc42.x86_64", - "linux_firmware": "linux-firmware-20251111-1.fc42.noarch", - "timestamp": "09 Jan 2026" -} \ No newline at end of file diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 3cebd78..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 247.81 ± 0.75 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.45 ± 0.27 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 37f9fc9..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.61 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.66 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 7ac8d22..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 246.64 ± 0.87 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.63 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 285f8a6..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.54 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fbe5a1d45a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fbe5a1d496b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fbe5a1d4aef] -/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7fbe5cf42972] -/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7fbe5cf47b0e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fbe5a1ebe5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fbe5d63eab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7fbe59b6a5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fbe59b6a668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log deleted file mode 100644 index b28b43c..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 250.33 ± 0.67 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.70 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 133d093..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 84.50 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fa4112eb5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa4112eb96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa4112ebaef] -/usr/local/lib64/libggml-hip.so.0(+0x2d5a8e2) [0x7fa4141028e2] -/usr/local/lib64/libggml-hip.so.0(+0x2d5fa7e) [0x7fa414107a7e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fa411302e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fa4147d3ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7fa410c815b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa410c81668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index f6b1cab..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 250.13 ± 0.62 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.71 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 0e4b151..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f3b59a565a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3b59a5696b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3b59a56aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d5a8e2) [0x7f3b5c86d8e2] -/usr/local/lib64/libggml-hip.so.0(+0x2d5fa7e) [0x7f3b5c872a7e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3b59a6de5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3b5cf3eab0] -/usr/local/bin/llama-bench() [0x40adbc] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7f3b593ec5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3b593ec668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index b9717bd..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.74 ± 2.03 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index f90aec0..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.80 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x107a8d10) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index a4a0e10..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.13 ± 0.85 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.73 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index e66c0c4..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.91 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x1f16bd10) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index f8bb9fc..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 333.45 ± 1.70 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.33 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index a9adb30..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 98.64 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.16 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 1052ad9..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 336.20 ± 2.04 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.77 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 8226b0c..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 98.44 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.88 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index c2a507e..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.36 ± 0.16 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 54e1c24..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 47.07 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f6af45f15a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6af45f196b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6af45f1aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f6af734e682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f6af735385e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f6af4608e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f6af7a23ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7f6af3f875b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f6af3f87668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 63f3a20..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.91 ± 1.10 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 02f852c..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.62 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f95789005a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f957890096b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f9578900aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f957b65d682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f957b66285e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f9578917e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f957bd32ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7f95782965b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f9578296668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index 555b577..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.90 ± 1.42 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index e9e7670..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Hip error: 'an illegal memory access was encountered'(700) at /longer_pathname_so_that_rpms_can_support_packaging_the_debug_info_for_all_os_profiles/src/rocm-libraries/projects/hipblaslt/library/src/amd_detail/hipblaslt.cpp:147 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 7a33710..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 329.23 ± 1.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 849d02b..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fb26dd2a5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb26dd2a96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fb26dd2aaef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fb270b325f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7fb270b377ce] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fb26dd41e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fb271232ab0] -/usr/local/bin/llama-bench() [0x40adbc] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7fb26d6c05b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb26d6c0668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 92f1c15..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.77 ± 1.72 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.70 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 3a80892..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.38 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ffa533f15a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ffa533f196b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ffa533f1aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7ffa5614e682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7ffa5615385e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7ffa53408e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7ffa56823ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7ffa52d875b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7ffa52d87668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 1d3fff3..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.19 ± 0.84 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.69 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 568e806..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.51 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ff4771b65a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ff4771b696b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ff4771b6aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7ff479f13682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7ff479f1885e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7ff4771cde5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7ff47a5e8ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7ff476b4c5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7ff476b4c668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 22ca86f..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.87 ± 0.79 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.59 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 508ce88..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,17 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -:0:rocdevice.cpp :3582: 48997963017 us: Callback: Queue 0x7ff041800000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 -Hip error: 'an illegal memory access was encountered'(700) at /therock/src/rocm-libraries/projects/hipblaslt/library/src/amd_detail/hipblaslt.cpp:147 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -Kernel Name: _ZL15flash_attn_tileILi128ELi128ELi16ELi4ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil -VGPU=0xe715690 SWq=0x7ff143a14000, HWq=0x7ff041800000, id=3 - Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 - grid=[4096, 8, 24], workgroup=[32, 8, 1] - private_seg_size=0, group_seg_size=33792 - kernel_obj=0x7fdfbe030100, kernarg_address=0x0x7ff040801600 - completion_signal=0x0, correlation_id=0 - rptr=15, wptr=47 - ✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 4a13b64..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.19 ± 0.73 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.82 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index bf772f6..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f126437b5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f126437b96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f126437baef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f12671835f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7f12671887ce] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1264392e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f1267858ab0] -/usr/local/bin/llama-bench() [0x40adbc] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7f1263d115b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1263d11668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 504b7db..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 228.89 ± 0.52 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.48 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 0483c52..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 40.49 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 9.30 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 3653d58..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 243.57 ± 0.43 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.54 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index d1590a2..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 52.62 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 14.35 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 3c85cf5..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 194.43 ± 0.27 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.65 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index bd8de34..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 36.61 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f4fa9af05a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4fa9af096b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f4fa9af0aef] -/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7f4fac85e972] -/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7f4fac863b0e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f4fa9b07e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f4facf5aab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7f4fa94865b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4fa9486668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index c437cd4..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.23 ± 0.26 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.64 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 2e0cf96..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 36.83 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.40 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log deleted file mode 100644 index fae0037..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.45 ± 0.65 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.31 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 3ebab6a..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,4 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index c56811e..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.71 ± 0.70 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 3ebab6a..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,4 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index f984551..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 275.04 ± 0.75 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.57 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 942f525..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.70 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x9cb5d10) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 8422388..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 272.75 ± 1.25 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index b9c5f49..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.85 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x2738fd10) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 45ffea3..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.38 ± 0.34 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.52 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 6b49d27..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 92.73 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.12 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index c29adb3..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.33 ± 0.75 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.62 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 226091a..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 92.73 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x3c5c0d10) reason :GPU Hang -✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index ab6d7f5..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 254.32 ± 0.84 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.51 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 594c729..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.17 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f75321e15a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f75321e196b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f75321e1aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f7534f3e682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f7534f4385e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f75321f8e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f7535613ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7f7531b775b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f7531b77668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index bb9476b..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 253.04 ± 1.12 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 2312744..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.53 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f042c8285a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f042c82896b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f042c828aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f042f585682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f042f58a85e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f042c83fe5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f042fc5aab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7f042c1be5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f042c1be668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log deleted file mode 100644 index ded8528..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 257.70 ± 0.50 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index d557084..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fca6c0ef5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fca6c0ef96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fca6c0efaef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fca6eef75f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7fca6eefc7ce] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7dd) [0x7fca6c10a46d] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fca6f5f87e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fca6f5fa2b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fca6f5ff6ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fca6f6004fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x408a57] -/lib64/libc.so.6(+0x35b5) [0x7fca6ba855b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fca6ba85668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 499a6d6..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 259.40 ± 0.46 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.61 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 4e96acb..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f4b572795a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4b5727996b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f4b57279aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f4b5a0815f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7f4b5a0867ce] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f4b57290e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f4b5a781ab0] -/usr/local/bin/llama-bench() [0x40adbc] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7f4b56c0f5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4b56c0f668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 080c5ce..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 254.22 ± 1.28 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 4e835eb..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 45.90 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fc72ee915a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fc72ee9196b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fc72ee91aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7fc731bee682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7fc731bf385e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fc72eea8e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fc7322c3ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7fc72e8275b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fc72e827668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 65abdf8..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 253.25 ± 1.33 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.53 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 34c164b..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 45.93 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fc83c7145a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fc83c71496b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fc83c714aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7fc83f471682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7fc83f47685e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fc83c72be5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fc83fb46ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7fc83c0aa5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fc83c0aa668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index 522f67c..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 258.89 ± 0.25 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.54 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 32379f8..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 79.91 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.08 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index c5c6987..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,27 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f48fca035a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f48fca0396b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f48fca03aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f48ff80b5f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d5fe47) [0x7f48ff81fe47] -/usr/local/lib64/libggml-hip.so.0(_Z19ggml_cuda_mul_mat_qR25ggml_backend_cuda_contextPK11ggml_tensorS3_S3_PS1_+0x7d3) [0x7f48ff98aba3] -/usr/local/lib64/libggml-hip.so.0(+0x2d5802c) [0x7f48ff81802c] -/usr/local/lib64/libggml-hip.so.0(+0x2d53e28) [0x7f48ff813e28] -/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f48ff81083f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f48fca1e483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f48ffee17e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f48ffee32b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f48ffee86ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f48ffee94fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7f48fc3995b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f48fc399668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 786a2a8..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fb299bf75a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb299bf796b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fb299bf7aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fb29c9ff5f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7fb29ca047ce] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fb299c0ee5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fb29d0d4ab0] -/usr/local/bin/llama-bench() [0x40adbc] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7fb29958d5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb29958d668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index da28589..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 279.25 ± 0.28 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.61 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index e54f589..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 42.15 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 7.96 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 8a162f3..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 244.36 ± 0.45 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.73 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 512278a..0000000 --- a/benchmark/results/20-12-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 54.92 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 11.62 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index d927b34..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.74 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 811a47d..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 23.88 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.52 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index c040916..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.41 ± 0.02 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 2466e1d..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 24.05 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.52 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log deleted file mode 100644 index 4abdd5e..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.85 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 8007afd..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.47 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.61 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 19f1cc0..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.38 ± 0.05 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 33e4c02..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.86 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.51 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index de26d53..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.84 ± 0.07 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 8b47c40..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 26.43 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.85 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index bb0fc76..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 144.36 ± 0.18 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 1022078..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 26.46 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x31c9cd10) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 03f0f5b..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.01 ± 0.05 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 3d5ade1..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 56.24 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.61 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 0ff9545..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.28 ± 0.12 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 9e418ec..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 56.12 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.60 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index 6224321..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.01 ± 0.05 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 57c1117..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 35.23 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f2519e175a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f2519e1796b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2519e17aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f251cb74682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f251cb7985e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f2519e2ee5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f251d249ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7f25197ad5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f25197ad668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index f6db3a2..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 143.94 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index abd30e6..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 34.82 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.86 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index cc7a19c..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.07 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 2ac9624..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f3d030795a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3d0307996b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3d03079aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f3d05e815f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7f3d05e867ce] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3d03090e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3d06581ab0] -/usr/local/bin/llama-bench() [0x40adbc] -/usr/local/bin/llama-bench() [0x408b3d] -/lib64/libc.so.6(+0x35b5) [0x7f3d02a0f5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3d02a0f668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 9b4a794..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.12 ± 0.04 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 22906b2..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 52.68 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index a0078fb..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.84 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index f6357ab..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 35.12 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.85 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 94c795b..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 143.47 ± 0.06 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index a58a6dd..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 35.06 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.82 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 4598534..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.96 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 25b568d..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,28 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f357c5865a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f357c58696b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f357c586aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f357f38e5f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d5fe47) [0x7f357f3a2e47] -/usr/local/lib64/libggml-hip.so.0(+0x2d5e41c) [0x7f357f3a141c] -/usr/local/lib64/libggml-hip.so.0(+0x2d5d5e7) [0x7f357f3a05e7] -/usr/local/lib64/libggml-hip.so.0(+0x2d580cb) [0x7f357f39b0cb] -/usr/local/lib64/libggml-hip.so.0(+0x2d53e28) [0x7f357f396e28] -/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f357f39383f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f357c5a1483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f357fa647e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f357fa662b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f357fa6b6ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f357fa6c4fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7f357bf1c5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f357bf1c668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 4d9e8ad..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.24 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 4b7c8ce..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 52.70 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.57 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 3c6c07c..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.73 ± 0.26 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 22f5b61..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 30.75 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 2.46 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 022269b..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 88.29 ± 0.76 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index fafeab9..0000000 --- a/benchmark/results/20-12-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 31.75 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 2.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 0a7e1b1..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 788.05 ± 2.61 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index ac3d66f..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 139.54 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 2a91c3d..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 787.49 ± 3.69 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 0ae20f9..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 136.23 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log deleted file mode 100644 index f2bbffd..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 799.32 ± 2.30 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 99f7301..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 273.53 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 5146b25..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 804.49 ± 4.05 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 91170dc..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.94 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index dcb50e9..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 792.57 ± 2.08 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 6d3e55b..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 121.02 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 8a899fd..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 794.96 ± 3.40 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 65b21b6..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 118.46 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log deleted file mode 100644 index 176af12..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 802.78 ± 0.92 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 758716e..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 289.71 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 8016de9..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 810.15 ± 2.26 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index e1046e5..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 316.13 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index 2eb1c65..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 781.49 ± 1.79 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 4914b28..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 176.40 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 916ac50..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 795.45 ± 1.95 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 47f5829..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 179.22 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log deleted file mode 100644 index 23a623e..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 797.09 ± 3.94 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 4cb4f21..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 261.91 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 036342e..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 800.44 ± 2.67 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 65bfaf3..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.67 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 5b64204..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 794.13 ± 2.11 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index a3fd4c8..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 177.29 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index c886bbe..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 757.71 ± 0.34 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index f50d2fb..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 175.04 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.83 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log deleted file mode 100644 index 7395dcc..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 803.93 ± 1.68 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 1ba6a51..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 266.48 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index e0dc575..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 767.62 ± 3.68 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index ba3c89c..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 280.71 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log deleted file mode 100644 index 8803b27..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 228.96 ± 0.25 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.20 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 7d406bf..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 101.86 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 6.76 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log deleted file mode 100644 index 7712766..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 198.30 ± 1.06 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.57 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index b4d60c2..0000000 --- a/benchmark/results/20-12-2025/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 98.61 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index b935b7e..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 575.90 ± 0.60 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.67 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 04b7b08..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 815.96 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 37.94 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 59f12dc..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 566.25 ± 5.65 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.75 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index f2ecfc7..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 818.18 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 37.96 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log deleted file mode 100644 index 583fbf1..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 573.96 ± 1.31 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 93a9c1f..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 840.39 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.57 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 38ae77f..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 568.52 ± 6.46 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.75 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 69cc8c3..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 842.90 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.55 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 5e2df6e..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.52 ± 3.84 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.23 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 1b85141..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1161.59 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.22 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index f7bf3a0..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1075.82 ± 2.72 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.19 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 69b8f82..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1159.76 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.28 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 5ce08c0..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.90 ± 4.65 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.13 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 857e033..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1083.84 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.84 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index e818cc0..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1074.40 ± 7.61 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.17 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 00c9009..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1126.17 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.77 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index 248c243..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1110.04 ± 2.67 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.43 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 1b323f7..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1034.85 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.58 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index c6ab5d1..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1073.92 ± 6.88 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.52 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 04dbbae..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1041.85 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.59 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log deleted file mode 100644 index 31d6990..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1119.24 ± 8.14 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.47 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 749e116..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 964.26 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.21 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 6178218..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1086.57 ± 5.04 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.44 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 61cd367..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 972.22 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.23 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 8e0530d..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1100.11 ± 6.03 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.51 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 0d652a5..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1036.83 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.63 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 20d0bbe..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1066.41 ± 6.56 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.45 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 6a5e27e..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1030.17 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log deleted file mode 100644 index 6d6d161..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1110.36 ± 6.45 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.23 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index d566191..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1042.68 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 9edc424..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1074.73 ± 4.78 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.51 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index a140be0..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1001.21 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.21 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index dbd7200..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1224.54 ± 7.71 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.56 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 31888ea..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 569.03 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 39.60 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 08568b4..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 940.69 ± 5.60 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 45.38 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index f7023ef..0000000 --- a/benchmark/results/20-12-2025/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 489.74 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 42.30 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 1c6bafc..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.33 ± 3.19 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.05 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index a42d7d6..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 73.22 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f12702305a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f127023096b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1270230aef] -/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7f1272f9e972] -/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7f1272fa3b0e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1270247e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f127369aab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7f126fbc65b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f126fbc6668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 04baa81..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.28 ± 1.09 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.09 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 871b575..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 72.86 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fd4172615a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd41726196b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fd417261aef] -/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7fd419fcf972] -/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7fd419fd4b0e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fd417278e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fd41a6cbab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7fd416bf75b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fd416bf7668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log deleted file mode 100644 index 57846b1..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.82 ± 1.18 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.06 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index c5d2182..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 63.02 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 466d7a6..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 196.80 ± 1.26 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.01 ± 0.07 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 3220457..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 65.07 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.83 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 3d604c2..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 198.21 ± 1.42 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index b607ba2..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 66.78 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x3bf1cd10) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 934a790..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.49 ± 1.92 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index a8ac7a1..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 69.68 ± 0.00 | -Memory access fault by GPU node-1 (Agent handle: 0x168dcd10) on address 0x7fc71a789000. Reason: Page not present or supervisor privilege. -:0:rocdevice.cpp :2992: 91563147890 us: Callback: Queue 0x7fe05ae00000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 -✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index bb2c5e9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 205.39 ± 1.95 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 8924df7..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x35d3ed10) reason :GPU Hang -✖ ! [rocm6_4_4] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 287edc5..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.21 ± 1.57 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.97 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 164727d..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1c519d10) on address 0x7eff38303000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_4] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index 90e4554..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.07 ± 0.88 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 564eb79..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 65.17 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.26 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 3d9aad0..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.14 ± 1.06 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.08 ± 0.05 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 960c254..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 64.99 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fc8c7bfb5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fc8c7bfb96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fc8c7bfbaef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7fc8ca958682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7fc8ca95d85e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fc8c7c12e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fc8cb02dab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7fc8c75915b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fc8c7591668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log deleted file mode 100644 index aee2b6f..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.48 ± 2.40 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.91 ± 0.14 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 6d4a794..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,27 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ff4276395a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ff42763996b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ff427639aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7ff42a4415f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d5f0bb) [0x7ff42a4550bb] -/usr/local/lib64/libggml-hip.so.0(+0x2d5d5e7) [0x7ff42a4535e7] -/usr/local/lib64/libggml-hip.so.0(+0x2d580cb) [0x7ff42a44e0cb] -/usr/local/lib64/libggml-hip.so.0(+0x2d53e28) [0x7ff42a449e28] -/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7ff42a44683f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7ff427654483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7ff42ab427e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7ff42ab442b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7ff42ab496ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7ff42ab4a4fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7ff426fcf5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7ff426fcf668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 59896b1..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f75f10bf5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f75f10bf96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f75f10bfaef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f75f3ec75f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7f75f3ed1ff5] -/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f75f3ecc83f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f75f10da483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f75f45c87e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f75f45ca2b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f75f45cf6ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f75f45d04fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7f75f0a555b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f75f0a55668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 3ebab6a..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,4 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 04307a9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.33 ± 0.72 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.13 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 977deb3..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 62.78 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7efc563255a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7efc5632596b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7efc56325aef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7efc59082682] -/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7efc5908785e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7efc5633ce5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7efc59757ab0] -/usr/local/bin/llama-bench() [0x408c12] -/lib64/libc.so.6(+0x35b5) [0x7efc55cbb5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7efc55cbb668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 2fd430d..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.34 ± 1.19 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.06 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index c1ee622..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 62.68 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.35 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index bb619c0..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.37 ± 1.76 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.91 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 0e71630..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fb6064785a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb60647896b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fb606478aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fb6092805f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7fb60928aff5] -/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7fb60928583f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fb606493483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fb6099567e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fb6099582b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fb60995d6ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fb60995e4fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7fb605e0e5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb605e0e668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index ba426aa..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.51 ± 1.70 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.98 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 35b0598..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ff5be8415a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ff5be84196b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ff5be841aef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7ff5c16495f2] -/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7ff5c1653ff5] -/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7ff5c164e83f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7ff5be85c483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7ff5c1d1f7e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7ff5c1d212b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7ff5c1d266ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7ff5c1d274fe] -/usr/local/bin/llama-bench() [0x40ad9b] -/usr/local/bin/llama-bench() [0x4088ac] -/lib64/libc.so.6(+0x35b5) [0x7ff5be1d75b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7ff5be1d7668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 9cfba84..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 145.16 ± 0.17 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.77 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 8419851..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 31.17 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 5.72 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 93bee5e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 131.53 ± 1.13 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.08 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index e397722..0000000 --- a/benchmark/results/20-12-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 34.22 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 9.66 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 5730ae3..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 489.10 ± 3.76 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 4d63c40..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.60 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.97 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 0f8a52e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 491.47 ± 1.44 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.14 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 95b1cb9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 271.39 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 14.61 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log deleted file mode 100644 index 71be6fa..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 491.95 ± 0.75 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 8240393..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 336.95 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.45 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 86257e9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 492.47 ± 3.40 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.15 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 2f690ca..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 318.73 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.44 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 911023a..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 567.78 ± 2.40 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.92 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 3aa46ab..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 268.54 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.12 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 2b2c080..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.99 ± 6.42 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index bbeaf1a..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 255.99 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.07 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index b35dbc0..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.13 ± 3.25 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.13 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index d6e3121..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x3b630d10) on address (nil). Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_4] Qwen3-30B-A3B-BF16-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index f3428c4..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3cc03d10) reason :GPU Hang -✖ ! [rocm6_4_4] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 35fc41e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 377.73 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.29 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index b8f1d36..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 460.49 ± 1.91 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.12 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index e5274e9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 234.40 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 17.98 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index ebf1c94..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 467.86 ± 1.23 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 04c685f..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 224.91 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index 7edcf87..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 468.47 ± 2.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.08 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index eeddc58..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 360.38 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.31 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index d7db6d1..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 470.04 ± 3.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.02 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index d59ba93..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 337.92 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.32 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 8e01011..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 461.77 ± 2.14 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 3203344..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 234.95 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.08 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 363c223..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 463.39 ± 4.60 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.15 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 769aa5f..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 223.07 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index d1196f9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 466.61 ± 1.68 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 28bb187..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 319.20 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.52 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 3140710..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 473.68 ± 1.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index bf51587..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 337.79 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.34 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 350bb76..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 194.36 ± 0.12 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.96 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index d329a30..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 86.76 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 8.71 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 3fa928a..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 167.29 ± 0.18 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.36 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index c038d21..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 102.90 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 8.57 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index e6192c9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 883.95 ± 6.89 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.73 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 2f33b05..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 323.55 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 20.52 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 6cc0a0b..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 888.32 ± 6.59 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.55 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 99edf40..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 323.18 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 20.51 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log deleted file mode 100644 index ec5dc7e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 887.98 ± 2.33 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.53 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 486fb05..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 273.12 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.25 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index afb9e9d..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 897.81 ± 13.27 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.56 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index a6ee119..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 273.07 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.23 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 1081a96..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1045.84 ± 8.87 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.04 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index e96d662..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 310.27 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.10 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 40a0181..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1046.62 ± 8.31 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 972826c..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 313.66 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.11 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 96b85d0..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1065.87 ± 15.74 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.71 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index c0d37cd..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 353.38 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index bc9b8e5..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1056.16 ± 8.88 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.68 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 5639307..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 341.15 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.59 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index 248c0ca..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 961.79 ± 10.60 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.69 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 64780d6..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.57 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.72 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index a6ced1b..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 964.88 ± 9.02 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 5318173..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.64 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log deleted file mode 100644 index 8acd7e7..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 992.39 ± 4.30 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.48 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index c655de8..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 286.94 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.51 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 42d8f1f..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 984.99 ± 7.73 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.39 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index a0ba594..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 284.86 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.29 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 37e54c9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 966.17 ± 7.59 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.85 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 9940008..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.45 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index b07a137..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 960.50 ± 8.25 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.91 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index f29e900..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.73 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.72 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log deleted file mode 100644 index 0898dc9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 986.79 ± 6.92 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.42 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 68652c6..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 410.85 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.33 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 9c58c7b..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 984.07 ± 5.87 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.52 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 29f6134..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.74 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.35 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 66e05a7..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1129.76 ± 4.79 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 62.27 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index c56e9f5..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 135.73 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 33.09 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 2b85a69..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 905.18 ± 4.26 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.46 ± 0.05 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 98a3d4b..0000000 --- a/benchmark/results/20-12-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 172.42 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 42.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 769297d..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1189.07 ± 7.02 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.58 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 99786f1..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 332.99 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.83 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 6ef2d48..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1185.59 ± 10.60 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.65 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 39fd865..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 338.96 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.84 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log deleted file mode 100644 index edf9feb..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1185.88 ± 9.66 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.64 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index eb57e0e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 295.00 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.59 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 4baec9f..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1189.90 ± 9.82 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.61 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 8e7e86b..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 291.08 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.74 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index c00de91..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1201.14 ± 12.83 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.92 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 4a793ea..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 319.92 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.24 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index fd3bded..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1208.02 ± 13.07 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.87 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 37260d9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 321.94 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.28 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log deleted file mode 100644 index ebb7e8e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.68 ± 19.07 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.13 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 5b810ee..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 387.32 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.53 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index cffdfb9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1231.06 ± 2.02 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.08 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index cd8e169..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 361.60 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.08 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index 50347bc..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1172.29 ± 9.77 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.19 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 1300fbd..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.50 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 29.99 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index a50a1bd..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1169.43 ± 4.95 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.31 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index dafa37b..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.83 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log deleted file mode 100644 index 0a23174..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1183.05 ± 9.42 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.17 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 510d0cb..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 292.36 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.95 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 0ecb4c7..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1195.38 ± 5.88 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index a0bbd29..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 287.87 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.84 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 71866cb..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1166.57 ± 8.22 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.37 ± 0.05 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index ee0dbbb..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.55 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 28f33f2..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1167.10 ± 5.32 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.33 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 92ee452..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.39 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.09 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log deleted file mode 100644 index b6cdbaa..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1194.63 ± 7.87 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.98 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 62b9aa2..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 292.47 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.87 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index b165442..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1202.35 ± 10.49 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.96 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 531a084..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 292.54 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.02 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log deleted file mode 100644 index 5d8e040..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 825.86 ± 2.68 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.94 ± 0.04 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 5b95baa..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 130.78 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 37.08 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log deleted file mode 100644 index 94bec33..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 864.66 ± 2.72 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.07 ± 0.04 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 8b6ad3a..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 168.69 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 47.63 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 005fd2f..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 345.64 ± 0.84 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.88 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 432cbb5..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 344.40 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 20.67 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 75330e1..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 356.53 ± 3.90 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 27.26 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 88740e8..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 352.86 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 24.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log deleted file mode 100644 index 9b4db1a..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 346.29 ± 1.98 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.92 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index e6be6ea..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 379.41 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.12 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index e2108ce..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 360.22 ± 1.39 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 27.35 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 94e037e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 386.65 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 25.80 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 0b57963..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 573.57 ± 2.61 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.89 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 8c312b8..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 464.91 ± 0.00 | -Memory access fault by GPU node-1 (Agent handle: 0x260d7d10) on address 0x7efad124d000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_4-rocwmma] Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 5dcd687..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 595.88 ± 2.98 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.34 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 10c5d70..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 480.86 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 23.30 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 31e9e1e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 576.31 ± 0.99 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.64 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 077eb4c..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 489.52 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 24.50 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 7269e2e..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 590.68 ± 0.83 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.38 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 6bfdd77..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 503.34 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 25.09 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index b4f68a7..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 571.05 ± 4.21 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.45 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 5bad71a..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 444.72 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.38 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 6a40914..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 594.40 ± 3.02 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.73 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 8a21a7c..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,33 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 451.46 ± 0.00 | -:0:rocdevice.cpp :3587: 7778304718 us: Callback: Queue 0x7f5274500000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 -Kernel Name: _ZL18flash_attn_ext_vecILi256ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS6_IjLj3EEiiiiiiiiiiiliiliiiiil -VGPU=0x3fa84a70 SWq=0x7f5276f10000, HWq=0x7f5274500000, id=2 - Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 - grid=[32, 60, 16], workgroup=[32, 4, 1] - private_seg_size=0, group_seg_size=8448 - kernel_obj=0x7f5275b4f600, kernarg_address=0x0x7f3d88d43980 - completion_signal=0x0, correlation_id=0 - rptr=813327, wptr=816270 - /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f52b67fb5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f52b67fb96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f52b67fbaef] -/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f52b9558682] -/usr/local/lib64/libggml-hip.so.0(+0x2cab085) [0x7f52b9563085] -/usr/local/lib64/libggml-hip.so.0(+0x2ca58cf) [0x7f52b955d8cf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f52b6816483] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f52b9c2e7e0] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f52b9c302b2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f52b9c356ff] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f52b9c364fe] -/usr/local/bin/llama-bench() [0x408c92] -/lib64/libc.so.6(+0x35b5) [0x7f52b61915b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f52b6191668] -/usr/local/bin/llama-bench() [0x409c25] -✖ ! [rocm7.1.1-rocwmma] Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index 5299717..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 568.38 ± 2.63 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.50 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 04895c4..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 462.35 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 6183815..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.50 ± 6.59 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.81 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 1c1bdde..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 479.80 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.62 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index eb49689..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 567.13 ± 2.43 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.44 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 6954010..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 438.42 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 23.36 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index c22f1e0..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 594.63 ± 11.03 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.76 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 354c810..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 442.41 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 23.66 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index f4855bd..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 570.28 ± 3.77 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.05 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 4d8d15d..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 473.92 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 24.80 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 0d97f79..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 588.80 ± 0.73 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 534ab2b..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 488.38 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.96 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index c79c18c..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 406.08 ± 1.14 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.67 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index e8d9bcb..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 194.48 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 27.27 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 45b0ac9..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 326.83 ± 0.94 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 30.18 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 320c1bb..0000000 --- a/benchmark/results/20-12-2025/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 220.99 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 27.91 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 361a6d7..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 421.94 ± 0.25 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index ecd2fdf..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 332.05 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.86 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index a340223..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 424.57 ± 0.40 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 8d852c2..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 325.94 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.85 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log deleted file mode 100644 index d060db9..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 429.89 ± 0.35 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index ba5f705..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 309.80 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.77 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 69b7431..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 430.69 ± 0.83 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.25 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 32c78b5..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 338.46 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.76 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index a2843f4..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 897.73 ± 0.63 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.14 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 312778f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 288.29 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 5bb70aa..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 904.28 ± 1.51 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 7ded8ad..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 290.79 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 235334a..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 913.75 ± 0.60 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.21 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index db48774..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 425.43 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.72 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 184977a..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 919.42 ± 0.44 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 616dc46..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 430.76 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.78 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index bc50843..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 886.59 ± 0.64 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index fc15a41..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 303.06 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.05 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 328096e..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 890.97 ± 0.75 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 64f9cf7..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 304.61 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log deleted file mode 100644 index 05f259d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 910.34 ± 0.61 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 4766a18..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 391.58 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.80 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 9cbc2a8..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 917.22 ± 0.99 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.28 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 58aaf7a..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 394.15 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.77 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index c77a98f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 882.81 ± 0.63 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.16 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 27debaf..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 302.60 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.02 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 8c5636c..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 891.69 ± 0.16 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.16 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 828c786..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 306.94 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log deleted file mode 100644 index 522f057..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 913.03 ± 1.04 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 7f19c44..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 399.02 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.77 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 19c608d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 917.06 ± 0.24 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.27 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 5fd8d29..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 391.45 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.78 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 1e29149..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 690.02 ± 1.72 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 411a64f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 54.64 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 12.98 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 649e0ba..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 549.78 ± 1.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.95 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 266830e..0000000 --- a/benchmark/results/20-12-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 270.10 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 11.42 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 6da9bcd..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.46 ± 1.81 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 8996dbe..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 167.60 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index fa35df2..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 522.97 ± 0.37 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index d4ae304..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 170.01 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.56 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log deleted file mode 100644 index ac992c7..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 477.16 ± 1.86 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 9520a2f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 237.04 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.87 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 12e2b0a..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 527.33 ± 1.41 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index fdf835b..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.71 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.87 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index e164e47..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.30 ± 0.54 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index f9c5383..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 167.49 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.71 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index fef7449..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 525.67 ± 0.68 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index d70f1e9..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 169.41 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.71 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 08ada47..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.62 ± 0.27 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 0f58777..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 246.86 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.86 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 59b9670..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 530.96 ± 0.63 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 63ed4a7..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 285.15 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.86 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index cd81a2b..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 463.74 ± 0.73 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 79e38a4..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 193.84 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.75 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 3ec22c1..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 522.71 ± 0.55 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index b029e97..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 205.34 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log deleted file mode 100644 index 4d7ac4f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.55 ± 0.52 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 1566960..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 234.18 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.89 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index d9990e7..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.17 ± 0.74 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index b854410..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 269.04 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.89 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 8554509..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 462.78 ± 0.04 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 646709f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 194.64 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index a04ab5b..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.88 ± 0.71 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 5e20aed..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 195.11 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 7e00b6d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 474.02 ± 0.22 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 176bc97..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 308.72 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.85 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 8372beb..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 506.00 ± 0.53 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index ca30c47..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 320.96 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.89 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 7efd9cc..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2819260416 failed. -ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index fb6fd18..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -ggml_vulkan: Device memory allocation of size 2819260416 failed. -ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory -main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 6df7e0c..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.99 ± 1.50 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.93 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 8d0bace..0000000 --- a/benchmark/results/20-12-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 67.01 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index af8f3fc..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2767.54 ± 1.34 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.17 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 825bba8..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1439.14 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.99 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index b5d1673..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2762.69 ± 4.25 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.08 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index c143312..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1442.24 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.87 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log deleted file mode 100644 index 6e0df52..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2759.74 ± 13.26 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.86 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index c304f88..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1331.68 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 71.38 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index fb3a67c..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2748.02 ± 15.88 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.54 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 54a7f4d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1341.74 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 71.34 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 73eee2d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2907.52 ± 4.15 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.61 ± 0.04 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 26bd649..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1365.96 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.95 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 069c775..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2902.86 ± 2.84 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.68 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 93d8e93..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1384.06 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.93 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log deleted file mode 100644 index 8d4790f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2868.25 ± 16.39 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.93 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index bc9a6e4..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1413.39 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.93 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 3d1a62f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.90 ± 17.97 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.07 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index f3ad530..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1414.92 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.13 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index bd61453..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2812.03 ± 15.70 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index f15de69..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1347.24 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.93 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 19e21d1..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2820.50 ± 10.20 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 45cfba0..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1390.56 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.23 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log deleted file mode 100644 index ae8df7d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2853.13 ± 21.11 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.93 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index e604ec0..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1368.50 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.97 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 88706b8..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2846.23 ± 16.40 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.96 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 7f6b35d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1361.15 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.99 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 988efe2..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2843.92 ± 0.49 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.68 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 0c8d637..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1377.32 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.63 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 0d50d4b..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2841.34 ± 6.05 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.55 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 585ce4f..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1340.85 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.21 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log deleted file mode 100644 index edb6f5d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2888.98 ± 3.97 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.99 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index d5bf76c..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1300.12 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 65.93 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 6d438b1..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.43 ± 2.44 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.99 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index be4afd5..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1332.98 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.59 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log deleted file mode 100644 index b62a85a..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1798.72 ± 4.50 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 90.73 ± 0.09 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 24bb270..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 466.89 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 74.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log deleted file mode 100644 index a154a01..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1633.15 ± 4.31 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.91 ± 0.19 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 42fcd8d..0000000 --- a/benchmark/results/20-12-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 776.82 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 58.76 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 2252fb8..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 274.17 ± 2.38 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.85 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index a29ee58..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 303.64 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 29.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index ee65108..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 276.82 ± 4.42 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.95 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index fdb5447..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 303.82 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 29.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log deleted file mode 100644 index 2d33a3b..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 274.29 ± 0.47 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.18 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 48bc097..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 301.57 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.81 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index b27b11c..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 274.47 ± 1.46 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.23 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 46fc918..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 289.74 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.82 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 7a833ac..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 657.63 ± 7.64 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.65 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 5159dd1..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 479.02 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.15 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 9ed737f..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 649.55 ± 10.69 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.68 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index b3e1604..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 480.59 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.88 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 303a0c0..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 654.41 ± 2.17 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.37 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index a44f435..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 470.66 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.00 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index aac1650..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 658.64 ± 9.76 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.93 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 4d3497f..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 472.41 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.26 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index dfe030e..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 637.48 ± 24.73 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.66 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 9ce2672..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 412.07 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.91 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index a164176..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 653.60 ± 10.65 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.79 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 315116f..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 409.77 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.99 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log deleted file mode 100644 index 79ce413..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.37 ± 5.37 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.13 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index 6d3d42a..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 496.89 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.73 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 0a274b3..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 646.49 ± 8.17 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.16 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index 81ff7b2..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 501.87 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 30be698..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 645.91 ± 13.82 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.67 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index cc45722..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 410.91 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.97 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index ec1ce05..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 653.08 ± 2.58 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.78 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 43778f2..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 413.56 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.99 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index 8113168..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 637.84 ± 10.76 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.22 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 9c34c8d..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 522.65 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.99 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index a93d1d4..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 642.63 ± 7.27 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.17 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index 4db2edc..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 532.69 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index a1dd94b..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 792.77 ± 0.78 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.34 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 123da16..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 266.30 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 39.24 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 9ffcf99..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 543.66 ± 0.88 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.37 ± 0.04 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 3dc745b..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 299.40 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 45.88 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 4326475..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 791.20 ± 5.92 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.24 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 381be35..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 539.70 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.26 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index fc16f14..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 795.34 ± 8.45 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.28 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 7c903ac..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 538.62 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.21 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log deleted file mode 100644 index 093a57c..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 788.07 ± 19.38 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.65 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index 5e1b2bb..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 585.34 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 61.13 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 59cb88e..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 777.22 ± 12.74 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.68 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 45a310e..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 533.90 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 61.16 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 5486142..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1752.24 ± 12.09 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.82 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 5552e25..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 796.69 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.10 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 86bf607..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1717.36 ± 12.37 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.94 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 6dcbab1..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 796.57 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.08 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log deleted file mode 100644 index 62ba657..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.01 ± 23.22 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.48 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index ae120ce..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 808.14 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 64.31 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index e64cdc4..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1726.91 ± 4.81 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.44 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index e298cff..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 817.70 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 64.37 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index 7ff138e..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.28 ± 7.42 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.12 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index e60eec1..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 670.09 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.17 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 29b33c0..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1695.75 ± 25.43 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.15 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 556c411..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 668.65 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log deleted file mode 100644 index 1530264..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1670.49 ± 30.36 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.45 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index e0cabe3..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 658.07 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.69 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 00e6a9e..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1661.92 ± 6.16 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.41 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index fd78ca6..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 654.36 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.59 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 2763926..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1692.63 ± 8.33 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.09 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index a63519e..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 668.58 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.12 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 49df202..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1721.79 ± 15.21 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.01 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 1b523b7..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 665.87 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.13 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log deleted file mode 100644 index fd42fa3..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1668.78 ± 30.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.33 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 6c3bf6a..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 633.71 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.64 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 67ac385..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1674.84 ± 21.95 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.29 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index fc067d5..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 644.71 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.66 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log deleted file mode 100644 index eff8190..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1965.23 ± 21.66 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.24 ± 0.04 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index de60c57..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 512.34 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 56.91 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log deleted file mode 100644 index 297b848..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1395.08 ± 16.05 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 79.60 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 214c4a8..0000000 --- a/benchmark/results/20-12-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 467.70 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 65.33 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 71b27ca..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1556.97 ± 0.78 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.65 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 8b6d29a..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 94.98 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index c8ebbe3..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1561.25 ± 2.77 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.59 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index c0ef94b..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 94.41 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1.log deleted file mode 100644 index 32f721d..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1572.74 ± 2.60 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.65 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log deleted file mode 100644 index a5326b0..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 351.26 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.04 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 3bbc163..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1572.88 ± 2.44 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log deleted file mode 100644 index 05bda17..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 346.35 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.03 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 8bea5c1..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1525.39 ± 0.85 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log deleted file mode 100644 index eb58868..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 104.91 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.08 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index adc47d5..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1524.22 ± 2.19 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index b41ed3a..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 106.82 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.07 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1.log deleted file mode 100644 index 8074b6d..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1539.28 ± 0.84 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log deleted file mode 100644 index 5adfe23..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 384.07 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.06 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 47a5836..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1538.89 ± 3.35 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.07 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log deleted file mode 100644 index 52dabe0..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 382.87 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.07 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log deleted file mode 100644 index e534712..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1492.67 ± 1.40 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.89 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log deleted file mode 100644 index be3abc4..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 141.66 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.37 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index d3c244e..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1495.58 ± 2.18 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.97 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index f7d0ab4..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 141.05 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1.log deleted file mode 100644 index 0d4f6c6..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1526.32 ± 2.10 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.96 ± 0.03 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log deleted file mode 100644 index a4690c7..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 348.77 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.34 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log deleted file mode 100644 index 5226879..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1530.07 ± 0.42 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.01 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log deleted file mode 100644 index e50b6d7..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 348.73 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.36 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index af74035..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1493.41 ± 1.54 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.90 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log deleted file mode 100644 index 7919346..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 139.87 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index a461503..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1494.79 ± 2.85 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.91 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log deleted file mode 100644 index 34ebd8c..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 139.47 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.36 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1.log deleted file mode 100644 index 9935294..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1529.76 ± 1.36 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.85 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log deleted file mode 100644 index 5c0b82d..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 350.95 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index a8d33d1..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1530.14 ± 1.62 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.88 ± 0.01 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log deleted file mode 100644 index fda552c..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 347.70 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log deleted file mode 100644 index 8493016..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1494.56 ± 4.36 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 56.03 ± 0.06 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log deleted file mode 100644 index 6f58f25..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 196.22 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 15.95 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1.log deleted file mode 100644 index 13624db..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1135.49 ± 4.16 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.73 ± 0.02 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log b/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log deleted file mode 100644 index 549cd93..0000000 --- a/benchmark/results/20-12-2025/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 294.01 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 15.16 ± 0.00 | - -build: 2aa45ef9e (7423) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index f8dbba3..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.30 ± 0.11 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.98 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 1e98d22..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.37 ± 0.13 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.76 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index fe9db4c..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.63 ± 0.08 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index a0c8e97..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.58 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 29499ed..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 80.49 ± 0.14 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index dedaf41..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 21.15 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 2223087..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.97 ± 0.09 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index a4d9675..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 21.17 ± 0.28 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 4540fd9..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f8003c3bc25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8003c3bfeb] -/lib64/libggml-base.so.0(+0x16669) [0x7f8003c4e669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f80033b2bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f800339cd3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f80033b2ea8] -/lib64/libggml-vulkan.so.0(+0x14f76) [0x7f8003cf6f76] -/lib64/libggml-vulkan.so.0(+0x13597f) [0x7f8003e1797f] -/lib64/libggml-vulkan.so.0(+0x136411) [0x7f8003e18411] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f8003c574d3] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f800756ce70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f800756f445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f8007575aaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f800757742e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x41977f] -/lib64/libc.so.6(+0x35b5) [0x7f80030835b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8003083668] -/usr/sbin/llama-bench() [0x41b595] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 8d7f3ff..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f34c7070c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f34c7070feb] -/lib64/libggml-base.so.0(+0x16669) [0x7f34c7083669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f34c67e7bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f34c67d1d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f34c67e7ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f34c712db68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f34c7213fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f34c708c092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f34ca9a1e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f34ca9a4445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f34ca9aaaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f34ca9ac42e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x41977f] -/lib64/libc.so.6(+0x35b5) [0x7f34c64b85b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f34c64b8668] -/usr/sbin/llama-bench() [0x41b595] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 1c52acf..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.20 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index eb50e64..0000000 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 7.39 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.60 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 8bc6dfa..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 435.72 ± 2.11 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.38 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index ec45ef3..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.93 ± 0.11 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.33 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 6771408..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 403.83 ± 1.74 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.37 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 28c5c04..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.49 ± 0.23 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.33 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index f8af5b7..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 393.19 ± 1.92 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 19.36 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 431595d..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.74 ± 0.05 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.06 ± 0.18 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 5c65164..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 393.15 ± 2.00 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.11 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index adc0015..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.55 ± 0.30 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.18 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 3da36d8..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 476.23 ± 2.21 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.36 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 8a819b6..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 86.79 ± 0.15 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.32 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index d0c28f4..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 401.13 ± 5.52 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.35 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3da0eab..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 85.92 ± 0.13 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.30 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 1400856..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 114.88 ± 0.15 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 0c10034..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.15 ± 0.00 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.05 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 2c91e29..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 342.11 ± 5.47 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 9.48 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index d0eb79f..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.93 ± 0.11 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index f49590b..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 905.79 ± 2.84 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.96 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index b2ff1c9..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.97 ± 0.19 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.58 ± 0.07 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index fcbe701..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 893.68 ± 28.19 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.20 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0835b60..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.83 ± 0.16 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.62 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 5f26656..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 903.39 ± 1.04 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 31.04 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 8dc2481..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.84 ± 0.23 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.33 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index ada6b79..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 906.25 ± 1.64 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.57 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 41a8790..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 100.07 ± 0.20 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.35 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index 37c4d87..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 902.85 ± 59.94 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.16 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index d4a1f68..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.75 ± 0.31 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.59 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index fd05cd0..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 906.42 ± 2.57 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.18 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index d6e72bd..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.99 ± 0.26 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.60 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index fcd1fbe..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 499.98 ± 1.29 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 39.01 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index e9e723c..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7fa3d16b4c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa3d16b4feb] -/lib64/libggml-base.so.0(+0x16669) [0x7fa3d16c7669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa3d0e2bbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa3d0e15d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7fa3d0e2bea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7fa3d1771b68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7fa3d1857fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fa3d16d0092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa3d4fe5e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa3d4fe8445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa3d4feeaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7fa3d4ff042e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x419a10] -/lib64/libc.so.6(+0x35b5) [0x7fa3d0afc5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa3d0afc668] -/usr/sbin/llama-bench() [0x41b595] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] GLM-4.7-Flash-UD-Q8_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index c84fe4a..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 853.46 ± 6.70 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 40.38 ± 1.22 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 3b8a27e..0000000 --- a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 133.27 ± 0.03 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.17 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log deleted file mode 100644 index 16a5422..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 673.71 ± 1.42 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.35 ± 0.17 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index fe8ad78..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 748.37 ± 4.67 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index ff41820..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.11 ± 1.59 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log deleted file mode 100644 index aaf2ed8..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 706.16 ± 3.76 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.39 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index f03361f..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.61 ± 1.47 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 7a1e96f..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 710.61 ± 4.31 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.39 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4ae9022..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 161.22 ± 2.64 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.06 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log deleted file mode 100644 index 4bfad54..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 881.45 ± 3.04 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.44 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index e15c3d6..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 213.50 ± 0.70 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 710a6a1..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 735.98 ± 5.08 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.44 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8ee4f6b..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 205.29 ± 0.84 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log deleted file mode 100644 index 47e06e4..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f9947881c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f9947881feb] -/lib64/libggml-base.so.0(+0x16669) [0x7f9947894669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f9946ff8bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f9946fe2d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f9946ff8ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f994793eb68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f9947a24fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f994789d092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f994b1b2e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f994b1b5445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f994b1bbaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f994b1bd42e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x419a10] -/lib64/libc.so.6(+0x35b5) [0x7f9946cc95b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f9946cc9668] -/usr/sbin/llama-bench() [0x41b595] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 59b5b4b..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f4bc4c80c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4bc4c80feb] -/lib64/libggml-base.so.0(+0x16669) [0x7f4bc4c93669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f4bc43f7bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f4bc43e1d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f4bc43f7ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f4bc4d3db68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f4bc4e23fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f4bc4c9c092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f4bc85b1e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f4bc85b4445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f4bc85baaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f4bc85bc42e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x41977f] -/lib64/libc.so.6(+0x35b5) [0x7f4bc40c85b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4bc40c8668] -/usr/sbin/llama-bench() [0x41b595] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log deleted file mode 100644 index 8311fc9..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 166.51 ± 0.62 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.94 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 645c378..0000000 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 72.65 ± 0.24 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.70 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 2d8a89d..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 263.95 ± 1.57 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.02 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index a6f51a2..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 305.52 ± 0.18 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.59 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index c181684..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 260.98 ± 1.56 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.60 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 6d79881..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 324.69 ± 0.27 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.38 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 6c0fee0..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 262.30 ± 1.42 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.86 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index ca1525c..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 324.11 ± 0.41 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.45 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index 2b76326..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 228.50 ± 1.06 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 39e5779..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.43 ± 0.38 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.76 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index c5c9ddb..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 236.84 ± 1.32 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.04 ± 0.05 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index db1e7af..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 290.04 ± 0.11 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.76 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index ecc3dbc..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 140.84 ± 0.27 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 13.97 ± 0.05 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index fe06b04..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 106.21 ± 0.16 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.21 ± 0.32 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index cd213bb..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 190.66 ± 7.11 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.41 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 4618d0c..0000000 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 191.12 ± 0.46 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.85 ± 0.26 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index bebfbdf..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.62 ± 1.88 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.64 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 69580a4..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.93 ± 0.03 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.41 ± 0.37 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 3945af1..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.32 ± 1.84 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.80 ± 0.11 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 51d1402..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.97 ± 0.14 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.63 ± 0.11 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 87dbc2a..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.89 ± 1.84 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 12.96 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 18f0388..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.32 ± 0.08 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.22 ± 0.11 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 2a8c48b..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 198.04 ± 1.90 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.11 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5d89766..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.27 ± 0.05 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.02 ± 0.31 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index e3df124..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.46 ± 1.16 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.31 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 7c95bbf..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 48.93 ± 0.02 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.34 ± 0.41 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index b2bd010..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.00 ± 1.99 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.33 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 61b7bff..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.33 ± 0.04 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.46 ± 0.34 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 865311b..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 106.76 ± 0.77 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 16.49 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index fdee904..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 26.54 ± 0.02 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.57 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 9130533..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 158.81 ± 2.40 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.16 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index b16d990..0000000 --- a/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 28.25 ± 0.02 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.49 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index cfd6de4..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 474.42 ± 2.29 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.36 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index f391dea..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 189.58 ± 0.12 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.53 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 3b26f36..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 479.78 ± 2.72 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.37 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 820d3b1..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 180.26 ± 0.37 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.54 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index b60bde6..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 556.96 ± 2.81 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.05 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index d9f8066..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 238.10 ± 0.11 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.26 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 6dbd18b..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 556.80 ± 4.26 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.16 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index d9525bd..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 237.85 ± 0.37 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.46 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 8f2facd..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 503.77 ± 3.42 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.54 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index e21cfdb..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.59 ± 0.07 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.25 ± 1.21 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index e018831..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 482.99 ± 0.86 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.49 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index dfd3f50..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 175.78 ± 0.14 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.94 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 05e1c62..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 171.39 ± 0.81 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 05e07dc..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.62 ± 0.08 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.49 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 7bc1638..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 369.06 ± 2.58 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.38 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 6636734..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 140.81 ± 0.52 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.24 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index 8ac5001..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 994.33 ± 11.86 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 55.87 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 1d4eadc..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.15 ± 0.11 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.88 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 81f42e5..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 994.22 ± 10.06 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 55.84 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index fc65e7c..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 208.93 ± 0.21 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.90 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 8d03802..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1009.37 ± 9.64 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 53.13 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index e60bbfd..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 285.49 ± 0.18 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.53 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 9ab5751..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1012.69 ± 9.02 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 54.94 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1d13bc6..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 285.25 ± 0.16 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.50 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index ae4042e..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1023.24 ± 11.08 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.92 ± 0.17 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index a37013f..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.37 ± 0.03 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.69 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 7aedfaa..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1012.11 ± 7.91 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.92 ± 0.21 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4a658bc..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 205.56 ± 0.23 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.62 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index 9239487..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 828.53 ± 4.66 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 63.31 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index a200dea..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 110.50 ± 0.14 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 25.87 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 29ec4d4..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1145.66 ± 9.68 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 68.15 ± 0.11 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 3dfccf2..0000000 --- a/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 193.03 ± 0.86 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.02 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log deleted file mode 100644 index e6d9faa..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1149.31 ± 13.28 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.72 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 31bdd1d..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 212.38 ± 0.33 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.49 ± 0.23 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 0031201..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1149.83 ± 7.05 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.05 ± 0.68 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1b4492c..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 212.45 ± 0.21 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.52 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log deleted file mode 100644 index 9642b43..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1170.01 ± 7.53 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 65.12 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index ad3e381..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 291.28 ± 0.14 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.65 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index a288f4e..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1167.76 ± 7.70 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 64.97 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index b190589..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 290.12 ± 2.48 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.65 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log deleted file mode 100644 index 12481a0..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1178.27 ± 10.86 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.46 ± 0.62 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 95d24a2..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.14 ± 0.10 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.83 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index ed6c60e..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1172.23 ± 12.92 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.65 ± 0.62 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8b694cc..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.05 ± 0.22 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.76 ± 0.25 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log deleted file mode 100644 index b1b4b06..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 882.98 ± 3.84 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.84 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 0e42549..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.65 ± 0.08 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.03 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log deleted file mode 100644 index 3b18735..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1290.50 ± 7.83 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.79 ± 0.18 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 253d842..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 194.26 ± 0.86 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.04 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index eaf9c62..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 613.58 ± 2.84 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.81 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index efca4d7..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 447.94 ± 2.75 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.06 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index ba7778f..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 583.48 ± 105.06 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.85 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4e99c3f..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 439.93 ± 32.65 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.09 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 029c2b7..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 627.46 ± 3.12 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.02 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index be93303..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 527.78 ± 1.05 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.22 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index b272219..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 623.64 ± 17.23 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.16 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9521439..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 512.14 ± 3.00 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.33 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index 1ce170b..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 622.47 ± 8.34 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.93 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 1e6ca64..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 443.00 ± 30.64 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.97 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 1407abc..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 600.08 ± 13.59 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.99 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 57f366f..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 457.86 ± 1.53 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.08 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index afbab06..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 396.36 ± 1.71 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 30.90 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index a881bd7..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 208.44 ± 0.64 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.08 ± 0.05 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index 728a4b6..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 500.88 ± 3.30 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 31.74 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 8b67617..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 420.42 ± 0.15 | -| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.05 ± 0.05 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 2791109..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 629.03 ± 5.19 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 31.37 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index ffe0750..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 439.65 ± 0.69 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.25 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index ca5a35f..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 648.58 ± 5.44 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 30.86 ± 0.77 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index dbd302b..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 457.60 ± 1.26 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.22 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 4ce1624..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 634.72 ± 6.93 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.90 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 07b6bfd..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 511.83 ± 7.51 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.45 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index cd84abc..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 632.78 ± 59.05 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 30.53 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 70d9321..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.17 ± 2.44 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.44 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 713a371..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 621.13 ± 82.64 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 31.52 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index aa85e0e..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 467.56 ± 0.55 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.26 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index ee04293..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 661.07 ± 5.80 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 31.54 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index ca2c10d..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 432.86 ± 0.82 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.20 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index c349ffd..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 436.50 ± 7.59 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 34.72 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index e177acc..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 218.02 ± 0.85 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.50 ± 0.07 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index f7c91c0..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 587.86 ± 37.36 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 36.28 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 25f1422..0000000 --- a/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 453.76 ± 0.75 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 31.67 ± 0.06 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index c769c07..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 313.18 ± 2.32 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.62 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6cacb57..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 243.74 ± 1.45 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.51 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 83fa89a..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 311.68 ± 1.84 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 18.77 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 995277c..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 273.72 ± 1.22 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 9330711..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 306.06 ± 2.55 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.25 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4066e47..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 270.84 ± 0.74 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.98 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index 0bb4c16..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 314.27 ± 4.13 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.66 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 482822d..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 235.12 ± 5.24 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.36 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 3c70313..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 310.73 ± 1.65 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.61 ± 0.11 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 22ef68a..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 241.56 ± 5.74 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.35 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 2aa7628..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 183.05 ± 1.84 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.31 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 9159adc..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.85 ± 0.15 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 18.76 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index ebcc2cb..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 239.56 ± 7.45 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.68 ± 0.38 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index ba5992f..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 203.34 ± 0.47 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.09 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index 88d3daf..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 508.43 ± 3.18 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.65 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8693fc9..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 382.51 ± 1.54 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.63 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 955738e..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 544.11 ± 3.06 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.40 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 19940ae..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 407.19 ± 1.96 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.59 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 23b8db1..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 550.28 ± 3.67 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.23 ± 0.18 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 221ccc6..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 405.13 ± 1.85 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.72 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 3151c98..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 541.57 ± 11.33 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.69 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 25444a3..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 441.64 ± 9.63 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.18 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 532dfcd..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 509.57 ± 8.20 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.78 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 24b9961..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 379.36 ± 1.33 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.20 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index ec9c2e2..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.56 ± 0.40 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.56 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index af1901a..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.32 ± 0.17 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.95 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index d914309..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 309.96 ± 4.20 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.79 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index a17d6c3..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.85 ± 0.77 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.39 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index 91bf9ac..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1071.38 ± 11.20 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.78 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 1c89299..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1093.96 ± 6.37 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.95 ± 0.30 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 18626cd..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 661.40 ± 0.76 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.13 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 77bc2b3..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1079.44 ± 6.76 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.46 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 28cdd02..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 762.29 ± 2.68 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.46 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index e8d6552..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1082.35 ± 6.79 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.48 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index e352549..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 777.89 ± 1.08 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.24 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index f34ef08..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1092.86 ± 9.42 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.16 ± 0.30 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index ca64073..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 655.39 ± 2.00 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.90 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 90d466f..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1113.86 ± 6.42 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.10 ± 0.31 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 558b9cf..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 655.89 ± 1.47 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.98 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index a947901..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 661.63 ± 3.14 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.16 ± 0.08 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index dcdc814..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 288.86 ± 0.53 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 44.24 ± 0.09 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index bc44011..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1013.40 ± 39.22 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 59.13 ± 0.07 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 2945779..0000000 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 673.55 ± 0.64 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 48.93 ± 0.13 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log deleted file mode 100644 index 5893093..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 854.20 ± 6.85 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index a017aaa..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 392.45 ± 16.97 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index f21710a..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 877.69 ± 1.71 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 526bc80..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 387.73 ± 18.58 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 797411c..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 869.53 ± 1.47 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index d664914..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 260.25 ± 4.30 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.60 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index e90965b..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 870.35 ± 1.54 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 829f89e..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 263.85 ± 5.85 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.61 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index 116b328..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 892.78 ± 1.04 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 77f2bd0..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 441.92 ± 38.71 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 2423cc9..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 895.09 ± 0.81 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1f6940d..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 449.40 ± 28.98 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index a32ca20..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 122.72 ± 0.06 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.31 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index ba09991..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 107.34 ± 0.03 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.32 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index c138142..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 513.78 ± 16.10 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.07 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 785012f..0000000 --- a/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.63 ± 1.57 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.53 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index f1ccf93..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 324.28 ± 1.23 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.97 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index b0cb077..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.64 ± 2.06 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.70 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 8531f51..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 455.07 ± 0.55 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.97 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 714f6c0..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.26 ± 1.59 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.70 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 47bc0c4..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 426.08 ± 0.81 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.83 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index ca28d62..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.69 ± 3.55 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.68 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index f180c49..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 440.33 ± 0.38 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.96 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5dc5e20..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.85 ± 0.64 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.69 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index ea47115..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.09 ± 1.03 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.98 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index ceac7ca..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.83 ± 1.03 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.68 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 2c5cf1e..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 452.21 ± 1.03 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.98 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index e630a45..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 181.81 ± 3.26 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.70 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index f8267e7..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f25ac3cbc25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f25ac3cbfeb] -/lib64/libggml-base.so.0(+0x16669) [0x7f25ac3de669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f25abb42bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f25abb2cd3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f25abb42ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f25ac488b68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f25ac56efd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f25ac3e7092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f25afcfce70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f25afcff445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f25afd05aaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f25afd0742e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x419a10] -/lib64/libc.so.6(+0x35b5) [0x7f25ab8135b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f25ab813668] -/usr/sbin/llama-bench() [0x41b595] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 17cbfe3..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f5b5347fc25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5b5347ffeb] -/lib64/libggml-base.so.0(+0x16669) [0x7f5b53492669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f5b52bf6bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f5b52be0d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f5b52bf6ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f5b5353cb68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f5b53622fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f5b5349b092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f5b56db0e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f5b56db3445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f5b56db9aaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f5b56dbb42e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x41977f] -/lib64/libc.so.6(+0x35b5) [0x7f5b528c75b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5b528c7668] -/usr/sbin/llama-bench() [0x41b595] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index be2d8b8..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 91.65 ± 0.40 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.98 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 8fe70f2..0000000 --- a/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 81.52 ± 0.33 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log deleted file mode 100644 index 92de1c6..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2700.94 ± 1.79 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.69 ± 0.66 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index ab19336..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1564.27 ± 18.55 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.80 ± 0.48 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 76309ba..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2691.00 ± 4.60 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.86 ± 0.64 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 85d1206..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1563.05 ± 19.77 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.95 ± 0.08 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log deleted file mode 100644 index 7fd54a5..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2719.77 ± 6.47 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 73.32 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index a88f066..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1687.79 ± 33.87 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.09 ± 0.16 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index ba6263c..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2713.06 ± 9.07 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.85 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index ea96dc2..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1681.29 ± 17.71 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.11 ± 0.29 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log deleted file mode 100644 index f37ea3e..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2666.49 ± 3.29 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.31 ± 0.89 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 769b2bf..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1530.18 ± 18.45 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.72 ± 0.36 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 0e4d278..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2674.53 ± 5.86 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.93 ± 0.64 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index d6379ae..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1534.58 ± 20.23 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.65 ± 0.44 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log deleted file mode 100644 index 9325e89..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 647.40 ± 0.53 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 79.07 ± 0.08 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index e54640a..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 455.63 ± 1.03 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 54.86 ± 0.17 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log deleted file mode 100644 index ed9c6f1..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 2479.97 ± 41.40 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 87.24 ± 0.13 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index c008393..0000000 --- a/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 1759.67 ± 6.72 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 64.91 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log deleted file mode 100644 index 83045a0..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 631.59 ± 4.27 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.91 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index dd99083..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 303.63 ± 0.57 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.07 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 8ff8feb..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 610.41 ± 53.09 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.34 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index d0b590a..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 302.82 ± 1.30 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.12 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 492fecb..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 648.44 ± 6.33 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 49.85 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index c6c534c..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 442.64 ± 0.84 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.69 ± 0.34 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 8d1b5bf..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.43 ± 31.93 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.99 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index c1bacb6..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 438.75 ± 1.06 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.75 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log deleted file mode 100644 index d70f3d4..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 596.69 ± 97.42 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.38 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 9670ec2..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 308.13 ± 1.66 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.43 ± 8.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 0857b4a..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 641.07 ± 11.17 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.35 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index a189399..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 314.50 ± 0.28 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.04 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index e8675e6..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 576.81 ± 2.43 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 51.18 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 315ed48..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 213.74 ± 0.68 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.52 ± 0.07 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log deleted file mode 100644 index c0ad41d..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 633.21 ± 13.06 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.15 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index fae75b7..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 287.49 ± 1.21 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 42.67 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log deleted file mode 100644 index 5e219c6..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1638.53 ± 13.50 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.67 ± 0.07 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index ca3a5f0..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 488.89 ± 0.54 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.91 ± 0.03 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index 6094846..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1631.29 ± 15.38 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.62 ± 0.09 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index f2a466a..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 492.09 ± 1.86 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.93 ± 0.04 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log deleted file mode 100644 index a7cf323..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1691.01 ± 16.68 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.07 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 45bb062..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.11 ± 1.16 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.02 ± 0.07 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index a3bdd9d..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1690.30 ± 13.53 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.00 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index edcc380..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.49 ± 1.20 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.89 ± 0.05 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log deleted file mode 100644 index 932a36a..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1668.50 ± 13.61 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.68 ± 0.10 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 65c4caa..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.77 ± 2.81 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.46 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index 50262df..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1642.70 ± 14.12 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.75 ± 0.10 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index f425c83..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.84 ± 1.97 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.32 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log deleted file mode 100644 index fa14b74..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1303.99 ± 6.84 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 73.68 ± 0.08 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 99d5ac7..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 364.73 ± 0.33 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.99 ± 0.21 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log deleted file mode 100644 index 917f775..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1577.96 ± 12.64 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 78.94 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 97ddca5..0000000 --- a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 541.12 ± 0.20 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 60.77 ± 0.08 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log deleted file mode 100644 index 99ce5e7..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1543.27 ± 2.63 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.61 ± 0.14 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log deleted file mode 100644 index 0dc8ee6..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.68 ± 0.88 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log deleted file mode 100644 index dd483cb..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1539.48 ± 5.61 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.57 ± 0.15 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5a66ad0..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.31 ± 1.47 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log deleted file mode 100644 index 5103c84..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1528.37 ± 6.40 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.14 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index e0b84ff..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.35 ± 2.62 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.97 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index acaa1cf..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1521.12 ± 5.74 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.22 ± 0.02 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 15d1b59..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.03 ± 1.28 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.97 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log deleted file mode 100644 index 6e1ba6a..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1507.68 ± 1.23 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.41 ± 0.14 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index c083ed2..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.97 ± 1.96 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.62 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log deleted file mode 100644 index c51b8f4..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1508.58 ± 2.01 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.44 ± 0.15 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log deleted file mode 100644 index 25dcbb6..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.54 ± 0.91 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.62 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log deleted file mode 100644 index a1c3881..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 324.82 ± 0.45 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.43 ± 0.14 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 0c19fe3..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 144.29 ± 1.09 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log deleted file mode 100644 index f0e5a2f..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1313.97 ± 1.29 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.59 ± 0.05 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 50c5ec7..0000000 --- a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 208.18 ± 2.24 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.52 ± 0.01 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/system_info.json b/benchmark/results/29-03-2026/system_info.json deleted file mode 100644 index 2efeb7c..0000000 --- a/benchmark/results/29-03-2026/system_info.json +++ /dev/null @@ -1 +0,0 @@ -{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", "timestamp": "29 Mar 2026"} diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 486713e..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 72.06 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 795fa30..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.76 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log deleted file mode 100644 index 93d71b6..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 77.83 ± 0.10 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.95 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 716d737..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.10 ± 0.04 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.06 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index f2036b6..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.40 ± 0.10 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.76 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index d77eb38..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.57 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.09 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log deleted file mode 100644 index 2a7f425..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.06 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.95 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index b6b5178..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.34 ± 0.14 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.06 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log deleted file mode 100644 index 5a74d0f..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 8.45 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.43 ± 0.02 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 7e0aed4..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f656fe92465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f656fe9282b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f656fea4ed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f656f608bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f656f5f2d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f656f608ea8] -/lib64/libggml-vulkan.so.0(+0x157c0) [0x7f656ff607c0] -/lib64/libggml-vulkan.so.0(+0x13fa8f) [0x7f657008aa8f] -/lib64/libggml-vulkan.so.0(+0x140521) [0x7f657008b521] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f656feae553] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6573ac9530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f6573acbb05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f6573ad226f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f6573ad3bee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416c49] -/lib64/libc.so.6(+0x35b5) [0x7f656f2d95b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f656f2d9668] -/usr/sbin/llama-bench() [0x4189d5] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 1ff0669..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f178fa50465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f178fa5082b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f178fa62ed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f178f1c6bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f178f1b0d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f178f1c6ea8] -/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f178fb203b2] -/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f178fc105d0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f178fa6c112] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1793687530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f1793689b05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f179369026f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f1793691bee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416c49] -/lib64/libc.so.6(+0x35b5) [0x7f178ee975b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f178ee97668] -/usr/sbin/llama-bench() [0x4189d5] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log deleted file mode 100644 index 2d1f68b..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log +++ /dev/null @@ -1,23 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f69acb1c465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f69acb1c83b] -/lib64/libggml-base.so.0(+0x16f19) [0x7f69acb2ef19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f69ac8bfbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f69ac8a9d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f69ac8bfea8] -/lib64/libggml-vulkan.so.0(+0x1728d) [0x7f69acbf128d] -/lib64/libggml-vulkan.so.0(+0x10a410) [0x7f69acce4410] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f69acb38192] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f69b07a6c70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f69b07a9255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f69b07af98f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f69b07b132e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x4038b9] -/lib64/libc.so.6(+0x35b5) [0x7f69ac5905b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f69ac590668] -/usr/sbin/llama-bench() [0x404e65] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index b4ac940..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 46.91 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index cc9c080..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 7.32 ± 0.01 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.59 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log deleted file mode 100644 index 9523902..0000000 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log +++ /dev/null @@ -1,25 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -radv/amdgpu: The CS has been cancelled because the context is lost. This context is innocent. -/lib64/libggml-base.so.0(+0x4465) [0x7fd3fbd2c465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd3fbd2c83b] -/lib64/libggml-base.so.0(+0x16f19) [0x7fd3fbd3ef19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7fd3fbacfbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fd3fbab9d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7fd3fbacfea8] -/lib64/libggml-vulkan.so.0(+0x1569b) [0x7fd3fbdff69b] -/lib64/libggml-vulkan.so.0(+0x14505a) [0x7fd3fbf2f05a] -/lib64/libggml-vulkan.so.0(+0x145c31) [0x7fd3fbf2fc31] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fd3fbd485d3] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fd3ff9b6c70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fd3ff9b9255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fd3ff9bf98f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7fd3ff9c132e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x403a5b] -/lib64/libc.so.6(+0x35b5) [0x7fd3fb7a05b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fd3fb7a0668] -/usr/sbin/llama-bench() [0x404e65] -terminate called after throwing an instance of 'vk::DeviceLostError' - what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_radv] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index e284659..0000000 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 403.45 ± 2.11 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index d97ca66..0000000 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.65 ± 0.31 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.25 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 6d03d4a..0000000 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 46.54 ± 0.17 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.91 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log deleted file mode 100644 index d1f71ff..0000000 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,2 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index ed83496..0000000 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.91 ± 0.20 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.84 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 503a469..0000000 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 46.47 ± 0.09 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.63 ± 0.19 | - -build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log index b419d8c..02e945a 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 403.20 ± 2.04 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 20.43 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 422.28 ± 1.78 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 23.17 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log index 508de56..630aead 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.63 ± 0.21 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.31 ± 0.02 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.33 ± 0.25 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.56 ± 0.01 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log index cf9cc2e..eb8984d 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 46.45 ± 0.01 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.98 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 48.38 ± 0.18 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 12.90 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log deleted file mode 100644 index d1f71ff..0000000 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log +++ /dev/null @@ -1,2 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log index 9250782..691ac41 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 392.81 ± 1.96 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 18.97 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 407.51 ± 1.36 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 22.00 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 4662794..d3ed5fa 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.55 ± 0.17 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.89 ± 0.01 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 97.18 ± 0.20 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.38 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log index 563fcbe..2db9935 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 50.95 ± 0.07 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.15 ± 0.09 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 51.90 ± 0.19 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 12.82 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log index 5735731..09783e2 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 400.57 ± 4.10 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 19.39 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 424.65 ± 3.93 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 23.30 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index ef19e5d..0ea8bb1 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 86.01 ± 0.04 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.86 ± 0.03 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 90.71 ± 0.38 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.55 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log index 8f535c1..f2abb61 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 45.39 ± 0.36 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.22 ± 0.13 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 47.61 ± 0.13 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 12.92 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index e2f4794..70128c6 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 114.94 ± 0.15 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.01 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 115.47 ± 0.14 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 10.78 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 2dc96f2..708dcf2 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.14 ± 0.00 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.04 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.42 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.40 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log index 3dd0992..d1ac5ad 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -1,24 +1,24 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7fa5c507b465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa5c507b83b] -/lib64/libggml-base.so.0(+0x16f19) [0x7fa5c508df19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa5c4e1ebfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa5c4e08d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7fa5c4e1eea8] -/lib64/libggml-vulkan.so.0(+0x1569b) [0x7fa5c514e69b] -/lib64/libggml-vulkan.so.0(+0x14505a) [0x7fa5c527e05a] -/lib64/libggml-vulkan.so.0(+0x145c31) [0x7fa5c527ec31] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa5c50975d3] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa5c8d05c70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa5c8d08255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa5c8d0e98f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7fa5c8d1032e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x403a5b] -/lib64/libc.so.6(+0x35b5) [0x7fa5c4aef5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa5c4aef668] -/usr/sbin/llama-bench() [0x404e65] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f4479108465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f447910883b] +/lib64/libggml-base.so.0(+0x16f59) [0x7f447911af59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f4478eabbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f4478e95d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f4478eabea8] +/lib64/libggml-vulkan.so.0(+0x1381f) [0x7f44791d981f] +/lib64/libggml-vulkan.so.0(+0x139c0a) [0x7f44792ffc0a] +/lib64/libggml-vulkan.so.0(+0x13a7e1) [0x7f44793007e1] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f4479124613] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f447cade9d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f447cae15b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7f447cae7f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f447cae9aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403c06] +/lib64/libc.so.6(+0x35b5) [0x7f4478b7c5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4478b7c668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] GLM-4.7-Flash-BF16-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log index a0c45c2..7002593 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 326.36 ± 4.62 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 9.50 ± 0.02 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 361.24 ± 1.76 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 9.46 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 8e29b1a..e653558 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.00 ± 0.06 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.02 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 109.96 ± 0.16 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.96 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log index 094219e..06a0f51 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 61.52 ± 0.01 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 6.57 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 65.21 ± 0.03 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 6.90 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 0252058..0000000 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 1058.10 ± 2.19 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.18 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 6a9e2f9..0000000 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.21 ± 1.08 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.62 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 2445d7c..0000000 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 47.63 ± 0.04 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.98 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log deleted file mode 100644 index 79c3029..0000000 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 889.16 ± 36.98 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.61 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index b24dbc6..0000000 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.01 ± 1.51 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.38 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 0d48067..0000000 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 47.62 ± 0.15 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.87 ± 0.00 | - -build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log index e64c344..39e42ba 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 905.99 ± 2.05 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.65 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 913.36 ± 47.37 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 35.50 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log index 7f43dd2..451f5ba 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.59 ± 0.16 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.36 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 95.82 ± 0.14 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.96 ± 0.02 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log index b6a4eed..9b2cbce 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 47.51 ± 0.05 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.86 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 49.39 ± 0.15 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.86 ± 0.20 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log index 45a93b4..27f36de 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 903.01 ± 2.32 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 30.77 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 929.42 ± 1.08 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.78 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 84cc9c5..db73952 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.70 ± 0.23 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.93 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 102.84 ± 0.07 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.41 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log index 8344413..395fb88 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 51.79 ± 0.10 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.36 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 53.18 ± 0.20 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.67 ± 0.10 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log index a2cc15a..facd1b7 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 888.70 ± 38.05 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.81 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 931.06 ± 6.95 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 35.52 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index ac18d67..87c0228 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.53 ± 0.09 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.47 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.94 ± 0.61 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.00 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log index 0d230ee..7d1f664 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 46.43 ± 0.14 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.43 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 48.90 ± 0.07 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.99 ± 0.01 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 57fea0d..28b8c9b 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 499.74 ± 1.49 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 39.02 ± 0.02 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 543.44 ± 1.10 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 39.66 ± 0.02 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 7615176..5a7ec4f 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,24 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f03039c2465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f03039c282b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f03039d4ed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f0303138bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f0303122d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f0303138ea8] -/lib64/libggml-vulkan.so.0(+0x157c0) [0x7f0303a907c0] -/lib64/libggml-vulkan.so.0(+0x13f96a) [0x7f0303bba96a] -/lib64/libggml-vulkan.so.0(+0x140521) [0x7f0303bbb521] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f03039de553] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f03075f9530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f03075fbb05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f030760226f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f0307603bee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416ec6] -/lib64/libc.so.6(+0x35b5) [0x7f0302e095b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0302e09668] -/usr/sbin/llama-bench() [0x4189d5] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fecbc5ac465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fecbc5ac83b] +/lib64/libggml-base.so.0(+0x16f59) [0x7fecbc5bef59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fecbc34fbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fecbc339d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fecbc34fea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7fecbc67f411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7fecbc768fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fecbc5c81d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fecbff829d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fecbff855b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7fecbff8bf95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fecbff8daae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403c06] +/lib64/libc.so.6(+0x35b5) [0x7fecbc0205b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fecbc020668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] GLM-4.7-Flash-UD-Q8_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log index 8abe7d5..fec9079 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7fb35126c465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb35126c83b] -/lib64/libggml-base.so.0(+0x16f19) [0x7fb35127ef19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7fb35100fbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fb350ff9d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7fb35100fea8] -/lib64/libggml-vulkan.so.0(+0x1728d) [0x7fb35134128d] -/lib64/libggml-vulkan.so.0(+0x10a410) [0x7fb351434410] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fb351288192] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fb354ef6c70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fb354ef9255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fb354eff98f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7fb354f0132e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x403a5b] -/lib64/libc.so.6(+0x35b5) [0x7fb350ce05b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb350ce0668] -/usr/sbin/llama-bench() [0x404e65] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fcec5e44465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fcec5e4483b] +/lib64/libggml-base.so.0(+0x16f59) [0x7fcec5e56f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fcec5be7bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fcec5bd1d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fcec5be7ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7fcec5f17411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7fcec6000fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fcec5e601d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fcec981a9d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fcec981d5b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7fcec9823f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fcec9825aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403b2b] +/lib64/libc.so.6(+0x35b5) [0x7fcec58b85b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fcec58b8668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] GLM-4.7-Flash-UD-Q8_K_XL__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log index fd4bd04..e1ba2c3 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 875.80 ± 2.04 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 40.19 ± 1.39 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 1092.07 ± 1.21 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 41.17 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index df14c95..52b137d 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 134.10 ± 0.07 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.18 ± 0.03 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 136.78 ± 0.02 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 22.55 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log index 7257eea..4f1d641 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 72.80 ± 0.00 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 14.39 ± 0.01 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 74.06 ± 0.01 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 15.70 ± 0.01 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 6151c79..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 356.93 ± 1.86 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.97 ± 0.13 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 73cec88..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 65.86 ± 0.58 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.10 ± 0.04 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 20d42cf..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -main: error: failed to load model '/home/kyuz0/models/mini-max-m2.7/UD-Q3_K_S/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003.gguf' -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -✖ ! [rocm-7_2_2-pr21344] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log deleted file mode 100644 index e2db156..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 236.39 ± 1.24 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.64 ± 0.01 | - -build: f53577432 (8942) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 52c1dcd..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 64.53 ± 0.53 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.04 ± 0.17 | - -build: f53577432 (8942) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 9680e41..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -main: error: failed to load model '/home/kyuz0/models/mini-max-m2.7/UD-Q3_K_S/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003.gguf' -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -✖ ! [rocm-7_2_2] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1.log index b78725e..e896482 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 236.96 ± 1.25 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.81 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 243.25 ± 1.32 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 23.11 ± 0.01 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log index 7ffced2..688571b 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 62.13 ± 0.56 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.15 ± 0.00 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 142.92 ± 0.05 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.19 ± 0.05 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log index 322f9e6..cafc5e6 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 236.56 ± 1.44 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 21.55 ± 0.00 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 238.97 ± 1.17 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 21.98 ± 0.02 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index ffb9df3..191d49b 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 77.34 ± 1.21 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.54 ± 0.09 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 119.83 ± 0.18 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.73 ± 0.18 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log index 05901f6..97dbdbc 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -1,3 +1,3 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -✖ ! [rocm6_4_4] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +✖ [rocm6_4_4] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 137) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log index 19ff545..7a85604 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 225.65 ± 0.79 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 21.44 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 235.98 ± 0.63 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.13 ± 0.03 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 1c157f4..4441b35 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 64.10 ± 0.50 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.35 ± 0.28 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 156.27 ± 0.25 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.45 ± 0.03 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log index e820993..ba92219 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 202.08 ± 0.31 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 24.94 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 205.56 ± 0.83 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 25.91 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index d513da2..1440940 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 31.48 ± 0.09 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.27 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 30.23 ± 0.05 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.74 ± 0.01 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log index 62960ea..3fade02 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -1,3 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -✖ ! [vulkan_amdvlk] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 13.90 ± 0.02 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 6.67 ± 0.05 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log index b06e345..3c8a8ed 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 231.25 ± 0.79 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 30.70 ± 0.02 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 242.43 ± 0.92 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 31.08 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log index cecf7a1..e941ced 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 32.00 ± 0.03 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.47 ± 0.04 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.86 ± 0.21 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.65 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log index 302dcdd..ebfb237 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -1,3 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -✖ ! [vulkan_radv] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 13.79 ± 0.08 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 8.84 ± 0.09 | + +build: 0253fb21f (9187) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log deleted file mode 100644 index 6caf1c2..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 245.87 ± 1.58 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.72 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index bed6c9f..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 81.20 ± 1.25 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.69 ± 0.12 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log deleted file mode 100644 index 4f7dba8..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 238.13 ± 1.15 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.54 ± 0.27 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 24d6e3f..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 61.41 ± 0.44 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.64 ± 0.15 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log deleted file mode 100644 index 192b97a..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 203.65 ± 1.00 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 24.07 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 621b928..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 31.64 ± 0.19 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.06 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log deleted file mode 100644 index 366fe9b..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 230.72 ± 8.67 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 29.48 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 9c5cc93..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 32.06 ± 0.16 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.12 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index d1d295f..0000000 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 745.50 ± 9.08 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 5c893b9..0000000 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.19 ± 1.96 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log deleted file mode 100644 index d702654..0000000 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 732.09 ± 2.49 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 2bb5fa9..0000000 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.01 ± 2.08 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.07 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 9bdaeeb..0000000 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 80.08 ± 2.35 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.11 ± 0.00 | - -build: ab6120cde (8997) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1.log similarity index 64% rename from benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1.log index d804f02..93b98ee 100644 --- a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 792.78 ± 1.08 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 801.06 ± 9.05 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | -build: 9c142e3a2 (7670) +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx32768.log similarity index 79% rename from benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx32768.log index edb719b..a35b9dc 100644 --- a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.30 ± 0.21 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.46 ± 1.14 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.12 ± 0.00 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx65536.log similarity index 79% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx65536.log index c268707..90b0de8 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 75.02 ± 1.56 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.11 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 209.94 ± 1.05 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.14 ± 0.00 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log index 7303ffc..b92fa3f 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 716.84 ± 2.31 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.39 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 762.49 ± 6.93 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log index 644eac3..2a17883 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.12 ± 0.76 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 318.28 ± 0.86 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log index b1347cd..930d54c 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 71.24 ± 0.32 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.09 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 188.86 ± 0.24 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.14 ± 0.00 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log index 3609ee1..f2831cf 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 726.92 ± 2.95 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.43 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 808.74 ± 14.71 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.51 ± 0.00 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log index 37b0832..722e29e 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 162.40 ± 0.45 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.07 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 395.07 ± 0.76 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.12 ± 0.00 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log index 3162148..8940c51 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 73.23 ± 0.94 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.11 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 239.54 ± 0.33 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.14 ± 0.00 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log index 43d4db9..1b8385d 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f143de79465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f143de7982b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f143de8bed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f143d5efbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f143d5d9d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f143d5efea8] -/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f143df493b2] -/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f143e0395d0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f143de95112] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1441ab0530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f1441ab2b05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f1441ab926f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f1441ababee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416ec6] -/lib64/libc.so.6(+0x35b5) [0x7f143d2c05b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f143d2c0668] -/usr/sbin/llama-bench() [0x4189d5] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f5169267465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f516926783b] +/lib64/libggml-base.so.0(+0x16f59) [0x7f5169279f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f516900abfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f5168ff4d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f516900aea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7f516933a411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7f5169423fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f51692831d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f516cc3d9d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f516cc405b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7f516cc46f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f516cc48aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403c06] +/lib64/libc.so.6(+0x35b5) [0x7f5168cdb5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5168cdb668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 failed (exit 0) +✖ [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 failed (exit 134) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log index 3fad042..978d981 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f4a99ca1465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4a99ca182b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f4a99cb3ed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f4a99417bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f4a99401d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f4a99417ea8] -/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f4a99d713b2] -/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f4a99e615d0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f4a99cbd112] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f4a9d8d8530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f4a9d8dab05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f4a9d8e126f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f4a9d8e2bee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416c49] -/lib64/libc.so.6(+0x35b5) [0x7f4a990e85b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4a990e8668] -/usr/sbin/llama-bench() [0x4189d5] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f388e92f465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f388e92f83b] +/lib64/libggml-base.so.0(+0x16f59) [0x7f388e941f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f388e6d2bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f388e6bcd3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f388e6d2ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7f388ea02411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7f388eaebfd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f388e94b1d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f38923059d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f38923085b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7f389230ef95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f3892310aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403989] +/lib64/libc.so.6(+0x35b5) [0x7f388e3a35b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f388e3a3668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx32768 failed (exit 0) +✖ [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx32768 failed (exit 134) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log index fe396cf..419a93f 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f41f07d5465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f41f07d583b] -/lib64/libggml-base.so.0(+0x16f19) [0x7f41f07e7f19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f41f0578bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f41f0562d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f41f0578ea8] -/lib64/libggml-vulkan.so.0(+0x1728d) [0x7f41f08aa28d] -/lib64/libggml-vulkan.so.0(+0x10a410) [0x7f41f099d410] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f41f07f1192] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f41f445fc70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f41f4462255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f41f446898f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f41f446a32e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x4038b9] -/lib64/libc.so.6(+0x35b5) [0x7f41f02495b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f41f0249668] -/usr/sbin/llama-bench() [0x404e65] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f25ade02465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f25ade0283b] +/lib64/libggml-base.so.0(+0x16f59) [0x7f25ade14f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f25adba5bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f25adb8fd3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f25adba5ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7f25aded5411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7f25adfbefd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f25ade1e1d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f25b17d89d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f25b17db5b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7f25b17e1f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f25b17e3aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403989] +/lib64/libc.so.6(+0x35b5) [0x7f25ad8765b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f25ad876668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost -✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx65536 failed (exit 0) +✖ [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx65536 failed (exit 134) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log index 22f733b..346cbd3 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 164.20 ± 0.55 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.94 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 255.05 ± 0.70 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.83 ± 0.00 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log index d95eba5..b52e9f5 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 72.41 ± 0.21 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.71 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 79.54 ± 0.26 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.62 ± 0.00 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log index 8439df5..47d2c14 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 42.48 ± 0.11 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.81 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 35.49 ± 0.19 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.74 ± 0.00 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index bf06839..0000000 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 333.46 ± 1.20 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.04 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index c4e622b..0000000 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 307.47 ± 0.55 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.60 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log deleted file mode 100644 index 44934bf..0000000 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 267.67 ± 1.70 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.18 ± 0.07 | - -build: f53577432 (8942) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index b1d3089..0000000 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 304.86 ± 0.24 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.79 ± 0.04 | - -build: f53577432 (8942) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index b578d21..0000000 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 249.66 ± 0.90 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.42 ± 0.07 | - -build: ab6120cde (8997) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1.log similarity index 62% rename from benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log rename to benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1.log index 3e633b5..f14062e 100644 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 259.71 ± 1.51 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.74 ± 0.00 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 275.89 ± 1.53 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 14.85 ± 0.04 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log similarity index 77% rename from benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log index 9cd5891..39c6022 100644 --- a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 302.29 ± 0.44 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.58 ± 0.01 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 382.38 ± 5.98 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.40 ± 0.07 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log similarity index 77% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log rename to benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log index fa48132..68129b7 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 250.79 ± 0.32 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.03 ± 0.04 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 348.10 ± 0.16 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 13.96 ± 0.16 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 86597fe..973f984 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 261.51 ± 1.50 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.63 ± 0.00 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 272.39 ± 5.98 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 14.50 ± 0.06 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index 7d5f443..b3d6c5b 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 323.83 ± 0.28 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 0.01 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 379.64 ± 0.46 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.15 ± 0.16 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log index 9198c57..f1698ec 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 290.67 ± 0.26 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.11 ± 0.05 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 333.18 ± 0.36 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 13.88 ± 0.17 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log index 229bdfb..3041cd7 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 232.54 ± 0.27 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.81 ± 0.00 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 239.02 ± 1.74 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 14.43 ± 0.02 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 0446b85..a1f4485 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.15 ± 0.23 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 0.61 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 353.69 ± 0.07 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.32 ± 0.08 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log index 97ba25a..e8ffdc4 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 242.09 ± 0.33 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.47 ± 0.10 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 326.04 ± 0.10 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.05 ± 0.19 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 7fc882e..6f0b3ce 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 138.78 ± 0.58 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.45 ± 0.09 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 160.64 ± 0.60 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.44 ± 0.10 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index 83f7a70..66e01f5 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.96 ± 0.02 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.66 ± 0.21 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 120.39 ± 0.10 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.67 ± 0.16 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log index 42c0726..47862c7 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 85.09 ± 0.16 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.21 ± 0.04 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 96.89 ± 0.18 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.30 ± 0.25 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log index 412da30..fd3678b 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 191.97 ± 8.53 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.70 ± 0.35 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 270.56 ± 2.02 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.86 ± 0.06 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index e3a63f3..1713890 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 185.90 ± 0.18 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.21 ± 0.38 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 242.77 ± 0.08 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.30 ± 0.15 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log index f07472e..2c645a7 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 174.22 ± 0.19 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.55 ± 0.03 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 216.34 ± 0.78 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.69 ± 0.35 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 2296b6a..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1498.50 ± 6.83 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.29 ± 0.67 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 608ce77..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 213.71 ± 0.11 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.47 ± 0.03 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 51d8bed..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 110.56 ± 0.10 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.59 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log deleted file mode 100644 index 7f4faac..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1149.95 ± 8.12 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.57 ± 0.10 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index ac5a4b3..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 194.74 ± 0.07 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.36 ± 0.13 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 8280acb..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 102.26 ± 0.13 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.53 ± 0.07 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log deleted file mode 100644 index 5011d54..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1167.77 ± 7.34 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 65.06 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 724b2a2..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 291.69 ± 0.26 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.81 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log deleted file mode 100644 index fa76777..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 159.40 ± 0.10 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.26 ± 0.17 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log deleted file mode 100644 index acd091b..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1185.86 ± 2.21 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.95 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index c0601b2..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.21 ± 0.11 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.77 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log deleted file mode 100644 index ec90521..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 102.96 ± 0.05 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 23.26 ± 0.12 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log deleted file mode 100644 index f60bdd6..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 882.68 ± 3.83 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.48 ± 0.06 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index ec6b25c..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.69 ± 0.03 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.03 ± 0.03 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log deleted file mode 100644 index 7744f9d..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 60.42 ± 0.04 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 17.44 ± 0.01 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log deleted file mode 100644 index 7d5e7d4..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1145.07 ± 8.85 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 82.16 ± 3.05 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 36ea50e..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 194.38 ± 0.33 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.09 ± 0.05 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log deleted file mode 100644 index 210de70..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 63.65 ± 0.38 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 24.54 ± 0.02 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 5b9a82c..0000000 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 418.75 ± 3.97 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.62 ± 0.03 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index c456266..0000000 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.29 ± 0.79 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 2.04 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log deleted file mode 100644 index b9f6f0d..0000000 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 314.59 ± 2.13 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.96 ± 0.15 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 2143a5b..0000000 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 235.56 ± 9.94 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.82 ± 0.08 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 4b7d92a..0000000 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 152.12 ± 0.13 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.21 ± 0.22 | - -build: ab6120cde (8997) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1.log similarity index 63% rename from benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log rename to benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1.log index 6190e09..5fb9e16 100644 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 305.11 ± 1.38 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.18 ± 0.00 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 336.69 ± 1.88 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 20.12 ± 0.02 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log similarity index 78% rename from benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log index 3f12afa..16389cd 100644 --- a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 246.59 ± 0.45 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.49 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 276.15 ± 4.08 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.73 ± 0.08 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log similarity index 78% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log rename to benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log index db2483b..5cc2573 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 170.79 ± 0.22 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.19 ± 0.06 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 192.28 ± 2.56 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.74 ± 0.27 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 23f86cc..aaa49bd 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 316.73 ± 1.27 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 18.86 ± 0.02 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 336.02 ± 1.65 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.73 ± 0.02 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index 4842808..64b1ace 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 275.81 ± 1.62 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.90 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 300.11 ± 0.10 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.47 ± 0.15 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log index 81ec5de..dc3af70 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 187.01 ± 3.13 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 16.91 ± 0.09 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 202.62 ± 1.80 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 17.30 ± 0.22 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log index dbdb4c2..9dee21d 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 318.17 ± 2.18 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.57 ± 0.14 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 338.52 ± 3.41 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 20.09 ± 0.02 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 5607dd4..3cda8ac 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.61 ± 0.83 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.32 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 272.63 ± 0.34 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.69 ± 0.18 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log index 8c2d444..7762da5 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 138.80 ± 0.17 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 17.26 ± 0.15 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 184.62 ± 0.08 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 17.52 ± 0.15 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index f1a164d..49290c8 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 182.98 ± 1.82 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.33 ± 0.00 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 202.91 ± 1.91 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.42 ± 0.01 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index c587a4f..68d82e3 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.84 ± 0.14 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 18.77 ± 0.04 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 115.36 ± 0.12 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 19.04 ± 0.02 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log index 2e87f68..a20596f 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 70.58 ± 0.04 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 16.94 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 81.21 ± 0.05 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 17.29 ± 0.02 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log index f531091..9fc261b 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 241.15 ± 7.43 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.75 ± 0.24 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 308.78 ± 4.29 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 22.30 ± 0.01 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index 502caf7..3a01aae 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 202.99 ± 0.41 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.09 ± 0.02 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 224.82 ± 0.40 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.38 ± 0.02 | -build: 3f8752b55 (8743) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log index 51a6b87..2f6d40c 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 156.41 ± 0.22 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 18.78 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 165.14 ± 0.48 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 18.99 ± 0.01 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1.log new file mode 100644 index 0000000..cf438be --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 1 | 0 | pp512 | 367.47 ± 2.44 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 1 | 0 | tg128 | 6.51 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log similarity index 62% rename from benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log index 8d95301..53dcfef 100644 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.08 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.00 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 197.70 ± 2.94 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.14 ± 0.01 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log similarity index 62% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log rename to benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log index 80cc879..7c79694 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 8.12 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.48 ± 0.00 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 106.30 ± 1.69 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.83 ± 0.00 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..174cd2e --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 1 | 0 | pp512 | 364.12 ± 1.62 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 1 | 0 | tg128 | 6.48 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log similarity index 62% rename from benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 8999b32..6cbb4ec 100644 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.56 ± 0.07 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.09 ± 0.00 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 172.59 ± 0.84 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.13 ± 0.01 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log similarity index 62% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log rename to benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log index ddb75a1..6c739d4 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 8.16 ± 0.08 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.58 ± 0.00 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.78 ± 1.21 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.81 ± 0.01 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1.log similarity index 60% rename from benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log rename to benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1.log index 35551cf..103bef0 100644 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.37 ± 0.10 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.96 ± 0.00 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 1 | 0 | pp512 | 371.42 ± 4.05 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 1 | 0 | tg128 | 6.51 ± 0.00 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log similarity index 75% rename from benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index 4f22b35..ad9e34b 100644 --- a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.07 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 190.65 ± 0.72 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.14 ± 0.01 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log similarity index 75% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log rename to benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log index a4c2062..f38be6a 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 7.94 ± 0.04 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.49 ± 0.00 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 105.08 ± 0.72 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.82 ± 0.01 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..b8d023a --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | pp512 | 80.22 ± 0.05 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | tg128 | 5.88 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..3e31791 --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 60.84 ± 0.01 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.42 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..1234ef9 --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 48.97 ± 0.01 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.03 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..98057f8 --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | pp512 | 274.97 ± 0.46 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | tg128 | 6.30 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..72dde4a --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 123.12 ± 0.56 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.92 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..d2fffe4 --- /dev/null +++ b/benchmark/results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 63.14 ± 0.36 | +| qwen35 27B Q8_0 | 33.31 GiB | 27.32 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.60 ± 0.00 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index ffdd0be..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 513.08 ± 3.80 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.46 ± 0.06 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 1800190..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 379.44 ± 1.48 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.65 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log deleted file mode 100644 index f836f61..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 509.14 ± 3.56 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.98 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index d1591bf..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 391.47 ± 0.29 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.88 ± 0.01 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index fe7a028..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 264.06 ± 0.65 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 20.14 ± 0.01 | - -build: ab6120cde (8997) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1.log similarity index 64% rename from benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1.log index 388df5f..a792f4c 100644 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 512.17 ± 3.01 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.69 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 525.94 ± 13.39 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 26.01 ± 0.00 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log similarity index 79% rename from benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log index b6be3f4..bf2df28 100644 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 423.84 ± 0.73 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.64 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 417.86 ± 0.42 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 23.91 ± 0.01 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log similarity index 79% rename from benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log index cea0bad..f49abb1 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 296.43 ± 1.09 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.60 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 322.65 ± 7.40 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.20 ± 0.01 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index 7f7683b..2de5d30 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 538.42 ± 12.90 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.91 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 573.71 ± 4.26 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 25.07 ± 0.00 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 1393959..2711ec9 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 412.11 ± 0.60 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.59 ± 0.09 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 426.65 ± 0.74 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 23.77 ± 0.01 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log index de6b985..54d9d88 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 326.10 ± 1.55 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.05 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 343.79 ± 1.31 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.22 ± 0.01 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log index f3a2f0e..57102f9 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,6 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB -main: error: failed to load model '/home/kyuz0/models/qwen-3.6-35b-a3b/BF16/Qwen3.6-35B-A3B-BF16-00001-of-00002.gguf' +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -✖ ! [rocm7-nightlies] Qwen3.6-35B-A3B-BF16-00001-of-00002__fa1 failed (exit 0) +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 528.97 ± 5.27 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 25.91 ± 0.01 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index c843485..e82db29 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,6 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB -main: error: failed to load model '/home/kyuz0/models/qwen-3.6-35b-a3b/BF16/Qwen3.6-35B-A3B-BF16-00001-of-00002.gguf' +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -✖ ! [rocm7-nightlies] Qwen3.6-35B-A3B-BF16-00001-of-00002__fa1 __longctx32768 failed (exit 0) +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 409.92 ± 1.94 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 23.96 ± 0.44 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log index a164cdd..f21d85e 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 256.49 ± 1.26 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 20.93 ± 0.06 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 314.48 ± 1.45 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.60 ± 0.02 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 6a5a268..e538e85 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.43 ± 0.23 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.55 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.89 ± 0.24 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.60 ± 0.03 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 028a15c..8193e40 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 98.18 ± 0.09 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.94 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 100.51 ± 0.04 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.93 ± 0.01 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log index dd4c0ad..dd5ed27 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 82.44 ± 0.12 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 10.40 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 85.86 ± 0.08 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 10.46 ± 0.01 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index 9cb2ce7..dca1ef1 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 316.59 ± 1.87 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.80 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 328.40 ± 1.64 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.68 ± 0.01 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index b336a9a..72bffa4 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 267.07 ± 0.91 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.37 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 275.80 ± 0.89 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.23 ± 0.00 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log index 4f99e4f..a611cbf 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 226.78 ± 1.49 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 10.05 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 241.42 ± 0.58 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 9.94 ± 0.02 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index d480c06..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1359.62 ± 5.74 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.34 ± 0.43 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index fc7af7d..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 670.26 ± 1.77 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.42 ± 0.04 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log deleted file mode 100644 index 195ecab..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1089.21 ± 5.82 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 49.27 ± 0.08 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 5ced017..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 703.58 ± 0.51 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.08 ± 0.33 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 3df355f..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 460.71 ± 1.18 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.40 ± 0.24 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1.log new file mode 100644 index 0000000..0b531cb --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 1 | 0 | pp512 | 1120.00 ± 8.54 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 1 | 0 | tg128 | 51.53 ± 0.16 | + +build: 1a68ec937 (9193) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log similarity index 62% rename from benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log index 15f4159..50b9233 100644 --- a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 669.09 ± 1.13 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.19 ± 0.03 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 746.60 ± 0.16 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 43.84 ± 0.36 | -build: 2405d59cb (8577) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log similarity index 62% rename from benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log index 983e08c..de749bd 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 412.60 ± 0.33 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.90 ± 0.02 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 493.36 ± 0.38 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 38.39 ± 0.27 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log index 0b2707e..14ac0c4 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1067.33 ± 6.85 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.23 ± 0.11 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 1 | 0 | pp512 | 1116.91 ± 7.21 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 1 | 0 | tg128 | 50.32 ± 0.12 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log index e6f9da3..5fe75e6 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 769.99 ± 2.79 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.91 ± 0.49 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 807.33 ± 0.34 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 43.47 ± 0.44 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log index 676bb76..19ebddf 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 473.84 ± 2.27 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 36.70 ± 0.97 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 547.71 ± 1.13 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 38.47 ± 0.41 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log index 083496a..84b8cc0 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1091.62 ± 10.45 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 49.10 ± 0.15 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 1 | 0 | pp512 | 1085.14 ± 11.75 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 1 | 0 | tg128 | 51.47 ± 0.18 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log index 382213d..427231d 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 664.89 ± 1.48 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 42.73 ± 0.65 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 716.94 ± 1.11 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 44.45 ± 0.70 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log index 222c0d8..c617aae 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 432.39 ± 0.18 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 38.08 ± 0.59 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 468.89 ± 0.65 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 39.30 ± 0.58 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log index ad0830a..a6f017e 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 663.94 ± 2.80 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 57.13 ± 0.04 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | pp512 | 696.54 ± 2.95 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | tg128 | 59.57 ± 0.07 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log index beda67a..45ebe64 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 292.49 ± 0.18 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 43.97 ± 0.06 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 319.31 ± 0.43 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 45.30 ± 0.20 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log index 122b511..89c4ad0 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 183.42 ± 0.98 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 36.62 ± 0.05 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 207.54 ± 0.69 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 36.74 ± 0.04 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log index fea8098..c73d477 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1045.16 ± 5.79 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.82 ± 0.08 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | pp512 | 1113.64 ± 7.57 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | tg128 | 60.43 ± 0.06 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log index e344a93..6add865 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 682.80 ± 1.45 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 48.77 ± 0.07 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 715.52 ± 1.18 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.21 ± 0.38 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log index 44d692e..0501d15 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 507.32 ± 1.33 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 42.75 ± 0.05 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 524.70 ± 1.20 | +| qwen35moe 35B.A3B Q4_K - Medium | 21.27 GiB | 35.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 43.23 ± 0.23 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 42341b2..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1222.77 ± 2.88 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 43.72 ± 0.08 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 14a3929..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 544.04 ± 1.79 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.70 ± 0.21 | - -build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 8f2da45..0000000 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 414.74 ± 1.74 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.74 ± 0.02 | - -build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1.log similarity index 64% rename from benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1.log index 06efdb6..1248374 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1043.12 ± 46.10 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 44.53 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1093.92 ± 7.09 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.18 ± 0.01 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log similarity index 79% rename from benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log index d92138b..058eb53 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 653.49 ± 0.18 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.21 ± 0.02 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 728.92 ± 0.14 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.00 ± 0.01 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log similarity index 79% rename from benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log index 6b183a9..30aab69 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 414.31 ± 0.61 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.30 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 486.17 ± 0.85 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.45 ± 0.05 | -build: 7957de9dc (8645) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log index 1c3f7f0..a5eaa51 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1049.22 ± 7.25 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 43.10 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1095.53 ± 11.56 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 45.13 ± 0.01 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index a75e2df..781378f 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 758.76 ± 0.99 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.70 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 787.54 ± 0.45 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.88 ± 0.00 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log index 39a638f..5a7dfbf 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 515.64 ± 0.13 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.53 ± 0.03 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 537.23 ± 1.72 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.72 ± 0.04 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log index 637ab39..f08b815 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1036.89 ± 9.24 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 44.20 ± 0.00 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1088.56 ± 10.42 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.21 ± 0.01 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index 104e4f9..8f7c6b6 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 510.38 ± 2.64 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.18 ± 1.71 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 704.46 ± 1.78 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.54 ± 0.21 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log index 464aae3..f4e0208 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 375.11 ± 2.14 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.02 ± 0.03 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 462.61 ± 1.15 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 36.31 ± 0.01 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 1ed9fe0..2107ef3 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 576.25 ± 2.16 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 45.25 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 640.78 ± 2.08 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 46.53 ± 0.07 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index e078197..0055266 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 273.77 ± 0.30 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 36.71 ± 0.08 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 307.07 ± 1.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.93 ± 0.07 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log index 9224ec2..52334ce 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 175.02 ± 1.28 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 31.29 ± 0.05 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 202.18 ± 0.79 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 32.35 ± 0.04 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log index 78c10fa..d6c6942 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 893.79 ± 4.74 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 46.05 ± 0.07 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1045.00 ± 7.30 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 46.33 ± 0.02 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index a6902ba..146be11 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 610.84 ± 1.99 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 39.67 ± 0.15 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 682.80 ± 0.96 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 40.08 ± 0.17 | -build: f53577432 (8942) +build: 1a68ec937 (9193) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log index cc7bef5..6724eb3 100644 --- a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 468.83 ± 1.17 | -| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 35.46 ± 0.04 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 504.87 ± 1.05 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 35.80 ± 0.05 | -build: ab6120cde (8997) +build: 1a68ec937 (9193) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index ad4fb02..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 836.74 ± 6.15 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.95 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 2cd4471..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 619.85 ± 6.10 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.38 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index c5eeaa8..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 408.04 ± 3.58 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.52 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log deleted file mode 100644 index ba410bb..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 823.65 ± 83.54 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.67 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 27ef16c..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 628.14 ± 5.88 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.15 ± 0.01 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 5f077bd..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 403.29 ± 1.83 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.35 ± 0.19 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log index d61700a..33baec1 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 796.06 ± 126.76 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.68 ± 0.00 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 909.04 ± 7.54 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 24.64 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log index 063ea41..132b7e9 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 628.20 ± 5.71 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.12 ± 0.00 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 667.44 ± 3.68 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.59 ± 0.01 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log index 4909514..c581b57 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 426.61 ± 3.98 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.55 ± 0.01 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 446.88 ± 2.91 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 20.79 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index 27617ae..0b67b9c 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 839.13 ± 7.61 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 20.75 ± 0.00 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 893.46 ± 6.97 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 23.37 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 38d2b4a..7a3983a 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 646.67 ± 11.22 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.86 ± 0.01 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 691.94 ± 9.26 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.39 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log index 898ec96..ac4f51b 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 441.14 ± 2.12 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 17.76 ± 0.01 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 470.28 ± 3.56 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 20.66 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log index bdb11b0..1a14f0c 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 844.42 ± 19.66 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.69 ± 0.00 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 870.72 ± 80.56 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 24.65 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index cf1a239..49f28c3 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 546.66 ± 5.96 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.11 ± 0.07 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 638.41 ± 6.72 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.58 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log index 95707ae..351e768 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 345.70 ± 0.13 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.16 ± 0.47 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 421.88 ± 0.33 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 20.88 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 1a94edd..ce565f3 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 117.96 ± 0.28 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 16.18 ± 0.02 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 123.59 ± 0.22 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 18.85 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 62d20bc..80e1308 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 58.94 ± 0.07 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 15.26 ± 0.01 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 60.07 ± 0.05 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 16.44 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log index 383b4a9..134402a 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 39.57 ± 0.04 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 14.10 ± 0.02 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 39.84 ± 0.03 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 15.10 ± 0.01 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log index 882cb85..6c4a2a0 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 376.52 ± 7.37 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 14.74 ± 0.01 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 587.64 ± 6.59 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 18.60 ± 0.02 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 98e4674..4b47ff7 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 351.87 ± 1.49 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.71 ± 0.06 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 428.06 ± 1.23 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 16.99 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log index 034abc4..02195b8 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 281.32 ± 0.70 | -| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.15 ± 0.03 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 331.82 ± 0.61 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 16.12 ± 0.02 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 3363194..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1608.01 ± 5.26 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 47.13 ± 0.29 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index ba51449..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 689.30 ± 12.21 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.14 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 2ce4c72..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 446.29 ± 0.87 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.16 ± 0.03 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log deleted file mode 100644 index e24d90b..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1299.90 ± 11.12 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.59 ± 0.02 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index e2f02bd..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 693.48 ± 4.74 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.76 ± 0.16 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 1bb9c55..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 453.92 ± 5.18 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.85 ± 0.16 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log index 88d1ad9..b2a616e 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1301.66 ± 10.69 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.42 ± 0.00 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1329.04 ± 9.98 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 48.46 ± 0.11 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log index 00152b7..e55bad8 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 693.56 ± 10.92 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.83 ± 0.15 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 725.46 ± 9.61 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.84 ± 0.15 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log index dd17c6d..64b339e 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 450.75 ± 0.77 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.84 ± 0.16 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 471.30 ± 1.61 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.61 ± 0.14 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log index 20ea166..0c61a87 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1288.38 ± 10.85 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 45.58 ± 0.01 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1334.91 ± 9.90 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 47.74 ± 0.07 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log index aad4700..65cf0f1 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 721.84 ± 10.97 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.17 ± 0.02 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 761.50 ± 12.52 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.29 ± 0.26 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log index ccd7682..dbf422d 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 469.64 ± 3.17 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.31 ± 0.25 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 499.49 ± 1.70 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.18 ± 0.24 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log index 1db2c43..ce3bc9d 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1219.74 ± 15.04 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.57 ± 0.01 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1194.14 ± 6.99 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 48.47 ± 0.12 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log index 8f086cb..efe677e 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 600.42 ± 7.55 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.88 ± 0.01 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 674.07 ± 6.75 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.68 ± 0.38 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log index 7a8df40..506b436 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 374.22 ± 0.13 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.77 ± 0.32 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 437.20 ± 0.38 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.52 ± 0.34 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log index 0699b0b..b8addde 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 720.78 ± 2.94 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 50.21 ± 0.06 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 765.18 ± 2.13 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 52.04 ± 0.02 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 5ec6aae..515bd29 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 100.79 ± 0.03 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 35.41 ± 0.01 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 101.11 ± 0.10 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 36.34 ± 0.06 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log index df37dff..951c510 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 54.90 ± 0.12 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 29.52 ± 0.03 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 54.46 ± 0.03 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 30.34 ± 0.05 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log index ef94402..7c46ef6 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 1213.78 ± 7.36 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 53.04 ± 0.10 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 1324.66 ± 8.47 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 54.67 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log index 6ac9013..cf99b7e 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 638.76 ± 1.76 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 40.93 ± 0.09 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 682.46 ± 2.10 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 42.40 ± 0.03 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log index 69589cc..3f5ede9 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 445.00 ± 0.08 | -| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 36.64 ± 0.06 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 469.95 ± 0.40 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 37.46 ± 0.07 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 48ba107..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1524.44 ± 7.11 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.68 ± 0.02 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 48432ce..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 689.36 ± 8.08 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.67 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index c26efe2..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 436.86 ± 3.98 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 31.55 ± 0.72 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log deleted file mode 100644 index 64550ee..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1272.69 ± 74.50 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.31 ± 0.01 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 9ed35f3..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 696.25 ± 7.26 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.95 ± 0.77 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index dc7f2ce..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 452.10 ± 0.82 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 29.87 ± 3.28 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log index b8fb75e..ef920f8 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1301.93 ± 18.07 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.35 ± 0.03 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1354.00 ± 8.88 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 42.35 ± 0.01 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log index d15b313..640a540 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 695.00 ± 7.26 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.41 ± 0.01 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 725.86 ± 11.87 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.01 ± 0.02 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log index c68c3d3..f44d2de 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 447.63 ± 3.37 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 31.76 ± 0.04 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 471.81 ± 3.32 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.14 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index 5fbd898..b225ff8 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1303.32 ± 8.77 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 38.81 ± 0.01 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1356.47 ± 9.00 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.28 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 84431bb..8d13e96 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 727.92 ± 8.62 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.52 ± 0.35 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 769.66 ± 14.85 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.66 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log index c1f0b33..1c943d4 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 451.48 ± 1.29 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 30.76 ± 0.03 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 504.88 ± 0.76 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.09 ± 0.02 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log index 834db9b..d2d1d2d 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1294.40 ± 31.54 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.09 ± 0.01 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1342.08 ± 8.02 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 42.32 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index 1203ea4..da10573 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 596.78 ± 7.98 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.35 ± 0.00 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 692.34 ± 8.31 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.00 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log index 9ee129b..b12cf40 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 363.83 ± 2.85 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 31.68 ± 0.02 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 447.95 ± 0.85 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.16 ± 0.02 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index c5a3b18..13e1450 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 589.20 ± 1.79 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 43.18 ± 0.07 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 669.11 ± 1.51 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 43.93 ± 0.02 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 9b69152..2284da8 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.70 ± 0.10 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 31.82 ± 0.05 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 99.13 ± 0.12 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 32.25 ± 0.02 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log index 7fccbf2..64467df 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 54.04 ± 0.08 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 26.65 ± 0.03 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 53.86 ± 0.04 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 27.52 ± 0.03 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log index 4ab7368..b20277c 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 944.96 ± 19.24 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 43.58 ± 1.06 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 1291.45 ± 8.95 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 45.48 ± 0.03 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index fd0cc70..0ee5b2d 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 579.88 ± 0.09 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 35.52 ± 0.07 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 672.93 ± 0.29 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 36.81 ± 0.04 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log index 7f18752..9bf0690 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 415.94 ± 0.55 | -| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 31.99 ± 0.06 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 465.71 ± 0.32 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 32.90 ± 0.01 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 7870940..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 380.58 ± 1.49 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.49 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 9ff1bae..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 150.63 ± 1.53 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.17 ± 0.03 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 1ab9d0f..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 96.84 ± 0.57 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.07 ± 0.02 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log deleted file mode 100644 index 64f36dd..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 377.93 ± 2.48 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.44 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index d58c607..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.11 ± 1.83 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.18 ± 0.01 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 61898ee..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 98.23 ± 0.26 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.08 ± 0.01 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log index 9ea45f7..ea11a28 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 376.47 ± 1.77 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.44 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 428.07 ± 3.61 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.55 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log index d6a45c5..0f9ed06 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 152.54 ± 2.43 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.18 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.20 ± 1.91 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.23 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log index 982b824..db4d5ee 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 98.53 ± 0.11 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.08 ± 0.02 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 108.99 ± 0.35 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.13 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index e55d3f2..0548fac 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 360.84 ± 1.60 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.30 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 410.15 ± 2.16 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.44 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 9885e65..53ababf 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 154.86 ± 2.16 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.14 ± 0.01 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 172.49 ± 2.42 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.21 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log index 3fdc9fe..c1c545d 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 101.08 ± 0.13 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 2.94 ± 0.02 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 114.16 ± 0.63 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.12 ± 0.01 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log index 1b9e740..8aab84e 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 373.84 ± 0.93 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.41 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 423.05 ± 6.41 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.53 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 1c1e2dd..d141254 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 139.53 ± 1.98 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.16 ± 0.05 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.77 ± 2.07 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.25 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log index 125b805..66fea99 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 83.27 ± 0.13 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.04 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 102.83 ± 0.08 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.14 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 2cf8a6a..f324469 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f2a944d5465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f2a944d582b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f2a944e7ed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f2a93c4bbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f2a93c35d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f2a93c4bea8] -/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f2a945a53b2] -/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f2a946955d0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f2a944f1112] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f2a9810c530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f2a9810eb05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f2a9811526f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f2a98116bee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416ec6] -/lib64/libc.so.6(+0x35b5) [0x7f2a9391c5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2a9391c668] -/usr/sbin/llama-bench() [0x4189d5] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7ff75610f465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ff75610f83b] +/lib64/libggml-base.so.0(+0x16f59) [0x7ff756121f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7ff755eb2bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7ff755e9cd3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7ff755eb2ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7ff7561e2411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7ff7562cbfd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7ff75612b1d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7ff759ae59d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7ff759ae85b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7ff759aeef95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7ff759af0aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403c06] +/lib64/libc.so.6(+0x35b5) [0x7ff755b835b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7ff755b83668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] gemma-4-31B-it-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 3a53be1..e41f6f1 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f7c4a4ab465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f7c4a4ab82b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f7c4a4bded9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f7c49c21bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f7c49c0bd3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f7c49c21ea8] -/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f7c4a57b3b2] -/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f7c4a66b5d0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f7c4a4c7112] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f7c4e0e2530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f7c4e0e4b05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f7c4e0eb26f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f7c4e0ecbee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416c49] -/lib64/libc.so.6(+0x35b5) [0x7f7c498f25b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f7c498f2668] -/usr/sbin/llama-bench() [0x4189d5] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fb7d3fe1465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb7d3fe183b] +/lib64/libggml-base.so.0(+0x16f59) [0x7fb7d3ff3f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fb7d3d84bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fb7d3d6ed3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fb7d3d84ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7fb7d40b4411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7fb7d419dfd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fb7d3ffd1d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fb7d79b79d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fb7d79ba5b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7fb7d79c0f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fb7d79c2aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403989] +/lib64/libc.so.6(+0x35b5) [0x7fb7d3a555b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb7d3a55668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] gemma-4-31B-it-BF16-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log index 6a066ed..87f932a 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7fa1d7fcc465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa1d7fcc83b] -/lib64/libggml-base.so.0(+0x16f19) [0x7fa1d7fdef19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa1d7d6fbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa1d7d59d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7fa1d7d6fea8] -/lib64/libggml-vulkan.so.0(+0x1728d) [0x7fa1d80a128d] -/lib64/libggml-vulkan.so.0(+0x10a410) [0x7fa1d8194410] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fa1d7fe8192] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa1dbc56c70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa1dbc59255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa1dbc5f98f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7fa1dbc6132e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x4038b9] -/lib64/libc.so.6(+0x35b5) [0x7fa1d7a405b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa1d7a40668] -/usr/sbin/llama-bench() [0x404e65] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f0020d59465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f0020d5983b] +/lib64/libggml-base.so.0(+0x16f59) [0x7f0020d6bf59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f0020afcbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f0020ae6d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f0020afcea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7f0020e2c411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7f0020f15fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f0020d751d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f002472f9d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f00247325b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7f0024738f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f002473aaae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403989] +/lib64/libc.so.6(+0x35b5) [0x7f00207cd5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f00207cd668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] gemma-4-31B-it-BF16-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log index ac42bfc..348e87f 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 79.53 ± 0.26 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 3.50 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 135.94 ± 0.46 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 3.50 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 7b3e175..43a29bd 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 60.29 ± 0.37 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.24 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 92.40 ± 1.45 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.25 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log index 8f3488f..20d4186 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 45.96 ± 1.09 | -| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 3.06 ± 0.00 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 67.88 ± 1.23 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 3.08 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 41a06c4..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 293.27 ± 0.37 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.50 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 3338698..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 136.84 ± 1.27 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 86912e3..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 91.86 ± 0.33 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.51 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log deleted file mode 100644 index 2c3483f..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 310.54 ± 0.73 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.52 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 79d530a..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 140.71 ± 1.29 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.01 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 94ec84e..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.57 ± 0.20 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.52 ± 0.01 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log index 60b508a..0b06c8a 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 309.16 ± 0.31 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.49 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 322.69 ± 0.81 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.55 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log index aa2a5d6..e50fce3 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 139.29 ± 1.61 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.10 ± 0.01 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 150.43 ± 1.59 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.13 ± 0.01 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log index 785a7fd..8ea35cf 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 92.80 ± 0.18 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.51 ± 0.01 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 101.59 ± 0.41 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.54 ± 0.01 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log index 66aac4c..aff532d 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 309.22 ± 0.13 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.02 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 317.49 ± 0.94 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.20 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log index d7be7d7..244b954 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 143.91 ± 1.64 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.81 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 154.73 ± 1.21 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.95 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log index b9aae5c..3aba455 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 96.98 ± 0.37 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.26 ± 0.01 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 106.22 ± 0.22 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.38 ± 0.01 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log index e3b7d2d..675c530 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 315.72 ± 0.30 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.51 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 313.83 ± 2.55 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.55 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log index dd44eae..536aa19 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 130.80 ± 1.45 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.20 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 142.53 ± 1.40 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.21 ± 0.02 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log index 43b05b9..b4b6fbb 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 84.96 ± 0.07 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.56 ± 0.02 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 95.32 ± 0.10 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.58 ± 0.02 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log index 2e5a7d2..9544d55 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 69.12 ± 0.02 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 10.71 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 70.48 ± 0.02 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 11.32 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 4f6b526..b7bec69 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f0f8a452465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f0f8a45282b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f0f8a464ed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f0f89bc8bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f0f89bb2d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f0f89bc8ea8] -/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f0f8a5223b2] -/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f0f8a6125d0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f0f8a46e112] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f0f8e089530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f0f8e08bb05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f0f8e09226f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f0f8e093bee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416ec6] -/lib64/libc.so.6(+0x35b5) [0x7f0f898995b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0f89899668] -/usr/sbin/llama-bench() [0x4189d5] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f74311b6465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f74311b683b] +/lib64/libggml-base.so.0(+0x16f59) [0x7f74311c8f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f7430f59bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f7430f43d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f7430f59ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7f7431289411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7f7431372fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f74311d21d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f7434b8c9d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f7434b8f5b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7f7434b95f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f7434b97aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403b2b] +/lib64/libc.so.6(+0x35b5) [0x7f7430c2a5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f7430c2a668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q4_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log index 7913e47..b235fe6 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7fbb0ecdb465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fbb0ecdb83b] -/lib64/libggml-base.so.0(+0x16f19) [0x7fbb0ecedf19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7fbb0ea7ebfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fbb0ea68d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7fbb0ea7eea8] -/lib64/libggml-vulkan.so.0(+0x1728d) [0x7fbb0edb028d] -/lib64/libggml-vulkan.so.0(+0x10a410) [0x7fbb0eea3410] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fbb0ecf7192] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fbb12965c70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fbb12968255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fbb1296e98f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7fbb1297032e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x403a5b] -/lib64/libc.so.6(+0x35b5) [0x7fbb0e74f5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fbb0e74f668] -/usr/sbin/llama-bench() [0x404e65] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fe26f226465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe26f22683b] +/lib64/libggml-base.so.0(+0x16f59) [0x7fe26f238f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fe26efc9bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fe26efb3d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fe26efc9ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7fe26f2f9411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7fe26f3e2fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fe26f2421d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fe272bfc9d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fe272bff5b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7fe272c05f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fe272c07aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403b2b] +/lib64/libc.so.6(+0x35b5) [0x7fe26ec9a5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fe26ec9a668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q4_K_XL__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log index a66892a..b85e109 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 244.46 ± 0.27 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 11.02 ± 0.01 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 301.53 ± 0.16 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 11.45 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log index 67bf195..a5684ef 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 124.32 ± 1.04 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.78 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 146.08 ± 1.04 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.09 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log index f764fbf..ec78316 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 76.03 ± 1.52 | -| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 7.78 ± 0.00 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 86.61 ± 2.84 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 7.88 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 704cc50..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 282.42 ± 0.40 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 1835f21..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.60 ± 1.37 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index d77f293..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 90.15 ± 0.22 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.00 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log deleted file mode 100644 index b397b89..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 308.17 ± 0.62 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index fbdfc88..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 138.85 ± 1.58 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 7d96053..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.92 ± 0.29 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.00 ± 0.00 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log index 8b2bd22..00abf73 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 309.61 ± 0.81 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.15 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 326.41 ± 1.50 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.17 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log index 3e6693a..9981324 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 139.33 ± 1.73 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.25 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 150.02 ± 1.17 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log index ea393fd..9305e1c 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.30 ± 0.54 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.00 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 100.93 ± 0.14 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.01 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index 4c792b4..35a3e6a 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 303.40 ± 0.23 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.07 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 325.63 ± 1.72 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.14 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 45ec95f..dd9ae50 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 141.37 ± 1.98 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.18 ± 0.08 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 155.90 ± 2.50 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.27 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log index 5ae9fb9..d2a4c4f 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.78 ± 0.36 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 4.95 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 106.02 ± 0.86 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.01 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log index 0ef79f1..ce2694a 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 319.75 ± 0.26 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.12 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 333.66 ± 1.84 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index 513ed3b..0b5532a 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 128.00 ± 1.30 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.30 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 143.80 ± 1.56 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.30 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log index f00e31f..117f557 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 81.58 ± 0.14 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 4.99 ± 0.05 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 96.12 ± 0.10 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.03 ± 0.01 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index c3c9edf..7a5c7b9 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 72.26 ± 0.02 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 6.28 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 73.59 ± 0.03 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 6.21 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index cc5da6c..7e3a66c 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,23 +1,24 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f8de9476465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8de947682b] -/lib64/libggml-base.so.0(+0x16ed9) [0x7f8de9488ed9] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f8de8becbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f8de8bd6d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f8de8becea8] -/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f8de95463b2] -/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f8de96365d0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f8de9492112] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f8ded0ad530] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f8ded0afb05] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f8ded0b626f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f8ded0b7bee] -/usr/sbin/llama-bench() [0x41a1bb] -/usr/sbin/llama-bench() [0x416ec6] -/lib64/libc.so.6(+0x35b5) [0x7f8de88bd5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8de88bd668] -/usr/sbin/llama-bench() [0x4189d5] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f995798c465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f995798c83b] +/lib64/libggml-base.so.0(+0x16f59) [0x7f995799ef59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f995772fbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f9957719d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f995772fea8] +/lib64/libggml-vulkan.so.0(+0x1381f) [0x7f9957a5d81f] +/lib64/libggml-vulkan.so.0(+0x139c0a) [0x7f9957b83c0a] +/lib64/libggml-vulkan.so.0(+0x13a7e1) [0x7f9957b847e1] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f99579a8613] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f995b3629d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f995b3655b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7f995b36bf95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f995b36daae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403c06] +/lib64/libc.so.6(+0x35b5) [0x7f99574005b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f9957400668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q8_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log index f4d2122..14e6d70 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x4465) [0x7f2f37a93465] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f2f37a9383b] -/lib64/libggml-base.so.0(+0x16f19) [0x7f2f37aa5f19] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f2f37836bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f2f37820d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f2f37836ea8] -/lib64/libggml-vulkan.so.0(+0x1728d) [0x7f2f37b6828d] -/lib64/libggml-vulkan.so.0(+0x10a410) [0x7f2f37c5b410] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f2f37aaf192] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f2f3b71dc70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f2f3b720255] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f2f3b72698f] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f2f3b72832e] -/usr/sbin/llama-bench() [0x40663b] -/usr/sbin/llama-bench() [0x403a5b] -/lib64/libc.so.6(+0x35b5) [0x7f2f375075b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2f37507668] -/usr/sbin/llama-bench() [0x404e65] +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fcccc1e5465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fcccc1e583b] +/lib64/libggml-base.so.0(+0x16f59) [0x7fcccc1f7f59] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fcccbf88bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fcccbf72d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fcccbf88ea8] +/lib64/libggml-vulkan.so.0(+0x15411) [0x7fcccc2b8411] +/lib64/libggml-vulkan.so.0(+0xfefd0) [0x7fcccc3a1fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fcccc2011d2] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fcccfbbb9d0] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fcccfbbe5b5] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x355) [0x7fcccfbc4f95] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fcccfbc6aae] +/usr/sbin/llama-bench() [0x4066fb] +/usr/sbin/llama-bench() [0x403b2b] +/lib64/libc.so.6(+0x35b5) [0x7fcccbc595b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fcccbc59668] +/usr/sbin/llama-bench() [0x404f35] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q8_K_XL__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log index 9f5b317..5faedde 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 209.20 ± 6.85 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 6.28 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 272.10 ± 0.28 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 6.30 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index 6b46536..fdba976 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 112.72 ± 1.26 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.49 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 141.06 ± 0.76 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.52 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log index 7ae88af..83149b4 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 71.68 ± 0.77 | -| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.02 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 88.45 ± 3.85 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.05 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 00caf55..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 945.82 ± 60.17 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.25 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index fcc4071..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 311.38 ± 1.36 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.94 ± 7.16 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 76a7342..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 175.99 ± 0.26 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 27.44 ± 0.31 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log deleted file mode 100644 index 6ee9804..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 634.64 ± 3.80 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.70 ± 0.04 | - -build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 5b9fb05..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 304.96 ± 1.71 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.84 ± 0.12 | - -build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 97ab7bd..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 175.32 ± 0.50 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 24.52 ± 5.66 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1.log index b9a196c..7b20187 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 635.33 ± 4.03 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.99 ± 0.05 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 625.58 ± 109.36 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.81 ± 0.04 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log index ba6f437..0f79407 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 302.31 ± 0.46 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.94 ± 0.12 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 589.25 ± 0.34 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.32 ± 0.13 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log index 88e55ac..bdf6e2f 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 175.25 ± 0.49 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 27.78 ± 0.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 371.47 ± 2.43 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 26.11 ± 3.50 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index 58b8b3f..5420599 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 648.77 ± 4.37 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 49.76 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 675.76 ± 4.24 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.07 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index fc17a5b..2c0c422 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 439.58 ± 0.57 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.77 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 603.27 ± 1.54 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.02 ± 0.24 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log index f636f42..f9fce77 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 273.87 ± 0.30 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.17 ± 0.38 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 379.11 ± 1.07 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.99 ± 0.14 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log index e805b34..627487e 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 641.05 ± 2.79 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.73 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 684.87 ± 3.91 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.48 ± 0.07 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 3360e12..1a544d8 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 305.81 ± 0.24 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.03 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 653.09 ± 1.74 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.34 ± 0.22 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log index dba40ef..11a79c6 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 177.27 ± 0.45 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 27.69 ± 0.13 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 412.83 ± 4.50 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 28.15 ± 0.13 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index ccde36b..1358ab6 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 575.01 ± 3.11 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 51.20 ± 0.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 664.67 ± 3.10 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 51.42 ± 0.05 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index bf15ed0..b2a44e8 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 214.22 ± 0.50 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.54 ± 0.04 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 234.02 ± 0.46 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 35.87 ± 0.04 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log index 1aedfef..0822a03 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 129.83 ± 0.04 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 26.17 ± 0.07 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 143.42 ± 0.11 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 27.87 ± 0.02 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index 7dfab3a..fd58775 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 635.12 ± 4.23 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.31 ± 0.08 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 719.91 ± 4.36 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.61 ± 0.08 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log index b654b3f..a6e0d21 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 290.19 ± 1.03 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 42.28 ± 0.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 307.50 ± 0.37 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 43.01 ± 0.04 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log index 3dd3243..0863e64 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 139.61 ± 0.56 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 35.00 ± 0.05 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 145.92 ± 0.15 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 34.97 ± 0.10 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index 0593f0d..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 2029.48 ± 6.68 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.53 ± 0.11 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 5cf4d57..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 494.19 ± 1.26 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.81 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index 07c8d4a..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 277.16 ± 1.25 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.39 ± 0.04 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log deleted file mode 100644 index da0f1ce..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1631.69 ± 17.03 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.53 ± 0.10 | - -build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 0c002a7..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 490.56 ± 2.76 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.69 ± 0.16 | - -build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index b3b5c05..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 275.76 ± 1.52 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.22 ± 0.34 | - -build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log index 80a1680..aeee0ed 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1636.30 ± 15.57 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.58 ± 0.05 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1786.17 ± 19.42 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.05 ± 0.06 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log index ac847ab..7581448 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 487.06 ± 3.04 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.73 ± 0.18 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1003.11 ± 2.13 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 52.17 ± 0.17 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log index 5d62b87..ba58bce 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 276.26 ± 0.34 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.32 ± 0.08 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 601.63 ± 1.09 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.68 ± 0.09 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index 5d0098e..62d7a4e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1694.78 ± 17.20 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.19 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1787.57 ± 15.77 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.68 ± 0.10 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log index 293a0cb..e35d23c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.43 ± 1.15 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.22 ± 0.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1027.43 ± 1.91 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.29 ± 0.37 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log index 879519f..2f5474a 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 440.23 ± 0.27 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 47.51 ± 0.29 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 613.02 ± 1.23 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 47.67 ± 0.25 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log index fa84182..80f0ba5 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1635.73 ± 10.16 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.47 ± 0.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1812.57 ± 16.28 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.03 ± 0.24 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log index 1c1adcb..651c6f0 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 493.84 ± 1.85 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 52.05 ± 0.04 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1124.14 ± 2.36 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 52.23 ± 0.24 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log index af03959..e2f05e7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 277.70 ± 0.33 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.28 ± 0.41 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 679.48 ± 9.08 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.79 ± 0.14 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index a2ddf3c..fb91016 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1306.44 ± 9.57 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 73.63 ± 0.06 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1376.47 ± 10.73 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 74.15 ± 0.04 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log index b322708..778904e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 365.33 ± 0.21 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.83 ± 0.07 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 388.64 ± 0.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 51.77 ± 0.12 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log index 3c9107e..ec86ef4 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 210.72 ± 0.07 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 38.53 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 228.50 ± 0.15 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 40.52 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index 795bbaa..3900f95 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1574.45 ± 16.63 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 79.03 ± 0.17 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1692.49 ± 16.61 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 79.78 ± 0.23 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log index c1e124d..5043525 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 545.65 ± 0.37 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 60.70 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 555.73 ± 1.50 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 61.12 ± 0.08 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log index ee2ba5a..47e0e34 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 229.28 ± 0.91 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 50.14 ± 0.11 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 237.74 ± 1.25 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 50.13 ± 0.03 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log deleted file mode 100644 index e97dc44..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1512.85 ± 4.84 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.54 ± 0.15 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log deleted file mode 100644 index 8f01f6a..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.39 ± 1.12 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log deleted file mode 100644 index a44a4d5..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 72.65 ± 0.35 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.00 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log deleted file mode 100644 index ef4d459..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1542.98 ± 7.94 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.52 ± 0.12 | - -build: f53577432 (8942) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log deleted file mode 100644 index 3bbc460..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 162.64 ± 0.31 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.64 ± 0.00 | - -build: f53577432 (8942) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log deleted file mode 100644 index 859f3de..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 72.92 ± 0.24 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.00 ± 0.00 | - -build: ab6120cde (8997) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1.log index 0e24968..f1c5103 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1543.35 ± 8.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.50 ± 0.12 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1545.36 ± 22.91 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.59 ± 0.03 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx32768.log index 5f07961..5478ac6 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 159.17 ± 0.71 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.64 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 159.95 ± 1.29 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.60 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx65536.log index f85d75e..5c192e4 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx65536.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 72.03 ± 0.23 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 71.44 ± 0.29 | | llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.00 ± 0.00 | -build: 8e1f9d083 (9112) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index ef866a8..33d9df4 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1524.35 ± 2.98 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.78 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1532.82 ± 13.52 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.83 ± 0.04 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log index 4667568..0374677 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.44 ± 1.93 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.96 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 187.91 ± 1.55 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.94 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log index 3d8bc49..eb826b8 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB + Device 0: AMD Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 83.57 ± 0.98 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.74 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 82.46 ± 0.59 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.76 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log index 27dfff5..ad2b078 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1513.21 ± 2.79 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.41 ± 0.13 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1516.11 ± 11.19 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.27 ± 0.05 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log index 182d7c6..9ffdff9 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 189.28 ± 0.40 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.62 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.17 ± 1.96 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.00 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log index c32cd3d..3ca0151 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 85.33 ± 0.30 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 2.90 ± 0.03 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 94.78 ± 1.11 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.79 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index 8d077eb..8e20b9d 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 326.38 ± 0.44 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.44 ± 0.11 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 347.25 ± 0.61 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.69 ± 0.15 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log index 2853f6a..e88a013 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 145.14 ± 0.20 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.21 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 155.90 ± 0.09 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.42 ± 0.00 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log index 9685755..6465b83 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 89.90 ± 0.68 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 4.61 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 102.05 ± 0.15 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.12 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index 68e0eb3..02ba413 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1311.79 ± 0.38 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.59 ± 0.03 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1337.70 ± 0.75 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.73 ± 0.04 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log index d0a49c1..72add42 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 210.35 ± 0.60 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.53 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 216.79 ± 1.07 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.47 ± 0.01 | -build: 3f8752b55 (8743) +build: 0253fb21f (9187) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log index 6e478db..ef64030 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +ggml_vulkan: 0 = AMD Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 104.40 ± 1.78 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 4.60 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 79.73 ± 0.65 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 4.58 ± 0.00 | -build: ab6120cde (8997) +build: 0253fb21f (9187) diff --git a/benchmark/results/system_info.json b/benchmark/results/system_info.json index d9ce179..fdbbdd0 100644 --- a/benchmark/results/system_info.json +++ b/benchmark/results/system_info.json @@ -1 +1 @@ -{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", "timestamp": "10 Apr 2026"} +{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.12-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", "timestamp": "16 May 2026"} diff --git a/docs/results.json b/docs/results.json index 2f3bbe4..9988617 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,38 +1,23 @@ { "meta": { - "generated_at": "2026-05-12T11:31:47Z", + "generated_at": "2026-05-18T06:42:13Z", "system_info": { "distro": "Fedora Linux 43 (Workstation Edition)", - "kernel": "6.19.9-200.fc43.x86_64", + "kernel": "6.19.12-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", - "timestamp": "10 Apr 2026" + "timestamp": "16 May 2026" }, "llamacpp_builds": [ { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" }, { - "hash": "7957de9dc", - "number": "8645" - }, - { - "hash": "8e1f9d083", - "number": "9112" - }, - { - "hash": "ab6120cde", - "number": "8997" - }, - { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } ], "environments": [ - "rocm-7_2", - "rocm-7_2_2", - "rocm-7_2_2-pr21344", "rocm-7_2_3", "rocm6_4_4", "rocm7-nightlies", @@ -42,902 +27,18 @@ "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second" }, "runs": [ - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 72.06, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.76, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 7.94, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 1.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 77.83, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.1, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 8.12, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 1.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 78.4, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.57, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 8.16, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 1.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 78.06, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 18.34, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 8.45, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 1.43, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 123.0, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 123.0, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 123.0, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": null - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 46.91, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 7.32, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 123.0, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": null - }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 403.45, - "tps_std": 2.11, + "tps_mean": 422.28, + "tps_std": 1.78, "error": false, "error_type": null, "backend": "ROCm", @@ -947,24 +48,80 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.04, + "tps_mean": 23.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 91.33, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.56, "tps_std": 0.01, "error": false, "error_type": null, @@ -975,81 +132,25 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 87.65, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 46.54, - "tps_std": 0.17, + "tps_mean": 48.38, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "ROCm", @@ -1059,24 +160,24 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 11.91, + "tps_mean": 12.9, "tps_std": 0.0, "error": false, "error_type": null, @@ -1087,49 +188,80 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, + "test": "pp512", + "tps_mean": 407.51, + "tps_std": 1.36, "error": false, "error_type": null, - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, - "build": null + "build": { + "hash": "0253fb21f", + "number": "9187" + } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.0, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 87.91, + "tps_mean": 97.18, "tps_std": 0.2, "error": false, "error_type": null, @@ -1140,24 +272,24 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 14.84, + "tps_mean": 16.38, "tps_std": 0.0, "error": false, "error_type": null, @@ -1168,52 +300,24 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 46.47, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 11.63, + "tps_mean": 51.9, "tps_std": 0.19, "error": false, "error_type": null, @@ -1224,344 +328,11 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 403.2, - "tps_std": 2.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 20.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 87.63, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.31, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 46.45, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 11.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": false, - "error_type": null, - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 392.81, - "tps_std": 1.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 94.55, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 14.89, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 50.95, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1574,8 +345,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 11.15, - "tps_std": 0.09, + "tps_mean": 12.82, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -1588,8 +359,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1602,8 +373,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 400.57, - "tps_std": 4.1, + "tps_mean": 424.65, + "tps_std": 3.93, "error": false, "error_type": null, "backend": "ROCm", @@ -1616,8 +387,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1630,7 +401,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 19.39, + "tps_mean": 23.3, "tps_std": 0.0, "error": false, "error_type": null, @@ -1644,8 +415,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1658,8 +429,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 86.01, - "tps_std": 0.04, + "tps_mean": 90.71, + "tps_std": 0.38, "error": false, "error_type": null, "backend": "ROCm", @@ -1672,8 +443,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1686,8 +457,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 14.86, - "tps_std": 0.03, + "tps_mean": 16.55, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -1700,8 +471,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1714,35 +485,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 45.39, - "tps_std": 0.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 11.22, + "tps_mean": 47.61, "tps_std": 0.13, "error": false, "error_type": null, @@ -1756,8 +499,36 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 12.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1770,8 +541,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 114.94, - "tps_std": 0.15, + "tps_mean": 115.47, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "Vulkan", @@ -1784,8 +555,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1798,7 +569,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 10.48, + "tps_mean": 10.78, "tps_std": 0.01, "error": false, "error_type": null, @@ -1812,8 +583,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1826,7 +597,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 10.14, + "tps_mean": 10.42, "tps_std": 0.0, "error": false, "error_type": null, @@ -1840,8 +611,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1854,7 +625,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.04, + "tps_mean": 5.4, "tps_std": 0.0, "error": false, "error_type": null, @@ -1868,8 +639,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1907,8 +678,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 326.36, - "tps_std": 4.62, + "tps_mean": 361.24, + "tps_std": 1.76, "error": false, "error_type": null, "backend": "Vulkan", @@ -1921,8 +692,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1935,8 +706,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 9.5, - "tps_std": 0.02, + "tps_mean": 9.46, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -1949,8 +720,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1963,8 +734,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 102.0, - "tps_std": 0.06, + "tps_mean": 109.96, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "Vulkan", @@ -1977,8 +748,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -1991,8 +762,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 7.82, - "tps_std": 0.02, + "tps_mean": 7.96, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -2005,8 +776,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2019,8 +790,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 61.52, - "tps_std": 0.01, + "tps_mean": 65.21, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -2033,8 +804,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2047,7 +818,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 6.57, + "tps_mean": 6.9, "tps_std": 0.0, "error": false, "error_type": null, @@ -2061,22 +832,22 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1058.1, - "tps_std": 2.19, + "tps_mean": 913.36, + "tps_std": 47.37, "error": false, "error_type": null, "backend": "ROCm", @@ -2086,24 +857,24 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 33.18, + "tps_mean": 35.5, "tps_std": 0.0, "error": false, "error_type": null, @@ -2114,25 +885,25 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 91.21, - "tps_std": 1.08, + "tps_mean": 95.82, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", @@ -2142,25 +913,25 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.62, - "tps_std": 0.01, + "tps_mean": 21.96, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -2170,192 +941,24 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 47.63, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 14.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 889.16, - "tps_std": 36.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 32.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 91.01, - "tps_std": 1.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 47.62, + "tps_mean": 49.39, "tps_std": 0.15, "error": false, "error_type": null, @@ -2366,179 +969,11 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 14.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 905.99, - "tps_std": 2.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 32.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 91.59, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 47.51, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2551,8 +986,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 14.86, - "tps_std": 0.0, + "tps_mean": 15.86, + "tps_std": 0.2, "error": false, "error_type": null, "backend": "ROCm", @@ -2565,8 +1000,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2579,8 +1014,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 903.01, - "tps_std": 2.32, + "tps_mean": 929.42, + "tps_std": 1.08, "error": false, "error_type": null, "backend": "ROCm", @@ -2593,8 +1028,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2607,8 +1042,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 30.77, - "tps_std": 0.0, + "tps_mean": 33.78, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -2621,8 +1056,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2635,8 +1070,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 99.7, - "tps_std": 0.23, + "tps_mean": 102.84, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -2649,8 +1084,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2663,8 +1098,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.93, - "tps_std": 0.0, + "tps_mean": 21.41, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -2677,8 +1112,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2691,7 +1126,35 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 51.79, + "tps_mean": 53.18, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 15.67, "tps_std": 0.1, "error": false, "error_type": null, @@ -2705,36 +1168,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 14.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2747,8 +1182,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 888.7, - "tps_std": 38.05, + "tps_mean": 931.06, + "tps_std": 6.95, "error": false, "error_type": null, "backend": "ROCm", @@ -2761,8 +1196,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2775,7 +1210,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 32.81, + "tps_mean": 35.52, "tps_std": 0.0, "error": false, "error_type": null, @@ -2789,8 +1224,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2803,8 +1238,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 89.53, - "tps_std": 0.09, + "tps_mean": 94.94, + "tps_std": 0.61, "error": false, "error_type": null, "backend": "ROCm", @@ -2817,8 +1252,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2831,7 +1266,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.47, + "tps_mean": 22.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -2845,8 +1280,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2859,8 +1294,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 46.43, - "tps_std": 0.14, + "tps_mean": 48.9, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -2873,8 +1308,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2887,8 +1322,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 14.43, - "tps_std": 0.0, + "tps_mean": 15.99, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -2901,8 +1336,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2915,8 +1350,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 499.74, - "tps_std": 1.49, + "tps_mean": 543.44, + "tps_std": 1.1, "error": false, "error_type": null, "backend": "Vulkan", @@ -2929,8 +1364,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -2943,7 +1378,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 39.02, + "tps_mean": 39.66, "tps_std": 0.02, "error": false, "error_type": null, @@ -2957,8 +1392,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3021,8 +1456,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 875.8, - "tps_std": 2.04, + "tps_mean": 1092.07, + "tps_std": 1.21, "error": false, "error_type": null, "backend": "Vulkan", @@ -3035,8 +1470,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3049,8 +1484,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 40.19, - "tps_std": 1.39, + "tps_mean": 41.17, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -3063,8 +1498,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3077,8 +1512,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 134.1, - "tps_std": 0.07, + "tps_mean": 136.78, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -3091,8 +1526,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3105,8 +1540,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 21.18, - "tps_std": 0.03, + "tps_mean": 22.55, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -3119,8 +1554,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3133,8 +1568,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 72.8, - "tps_std": 0.0, + "tps_mean": 74.06, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -3147,8 +1582,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3161,7 +1596,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 14.39, + "tps_mean": 15.7, "tps_std": 0.01, "error": false, "error_type": null, @@ -3175,284 +1610,10 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 356.93, - "tps_std": 1.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.97, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 65.86, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.1, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": null - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 236.39, - "tps_std": 1.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.64, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 64.53, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.04, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": null - }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", @@ -3463,8 +1624,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 236.96, - "tps_std": 1.25, + "tps_mean": 243.25, + "tps_std": 1.32, "error": false, "error_type": null, "backend": "ROCm", @@ -3477,8 +1638,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3491,7 +1652,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.81, + "tps_mean": 23.11, "tps_std": 0.01, "error": false, "error_type": null, @@ -3505,8 +1666,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3519,8 +1680,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 62.13, - "tps_std": 0.56, + "tps_mean": 142.92, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -3533,8 +1694,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3547,8 +1708,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.15, - "tps_std": 0.0, + "tps_mean": 6.19, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -3561,8 +1722,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3600,8 +1761,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 236.56, - "tps_std": 1.44, + "tps_mean": 238.97, + "tps_std": 1.17, "error": false, "error_type": null, "backend": "ROCm", @@ -3614,8 +1775,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -3628,8 +1789,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.55, - "tps_std": 0.0, + "tps_mean": 21.98, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -3642,8 +1803,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -3656,8 +1817,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 77.34, - "tps_std": 1.21, + "tps_mean": 119.83, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "ROCm", @@ -3670,8 +1831,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -3684,8 +1845,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.54, - "tps_std": 0.09, + "tps_mean": 6.73, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "ROCm", @@ -3698,8 +1859,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -3737,8 +1898,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 225.65, - "tps_std": 0.79, + "tps_mean": 235.98, + "tps_std": 0.63, "error": false, "error_type": null, "backend": "ROCm", @@ -3751,8 +1912,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3765,8 +1926,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.44, - "tps_std": 0.01, + "tps_mean": 22.13, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -3779,8 +1940,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3793,8 +1954,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 64.1, - "tps_std": 0.5, + "tps_mean": 156.27, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "ROCm", @@ -3807,8 +1968,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3821,8 +1982,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.35, - "tps_std": 0.28, + "tps_mean": 6.45, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -3835,8 +1996,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3874,8 +2035,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 202.08, - "tps_std": 0.31, + "tps_mean": 205.56, + "tps_std": 0.83, "error": false, "error_type": null, "backend": "Vulkan", @@ -3888,8 +2049,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3902,7 +2063,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 24.94, + "tps_mean": 25.91, "tps_std": 0.01, "error": false, "error_type": null, @@ -3916,8 +2077,8 @@ "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -3930,7 +2091,259 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 31.48, + "tps_mean": 30.23, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.74, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 13.9, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 6.67, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 242.43, + "tps_std": 0.92, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 31.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 33.86, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.65, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 13.79, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 8.84, "tps_std": 0.09, "error": false, "error_type": null, @@ -3941,663 +2354,25 @@ "file_size_gib": 87.2, "name_params_b": 228.69, "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.27, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": null - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 231.25, - "tps_std": 0.79, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 30.7, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 32.0, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 14.47, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q3_K_S", "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log", "rpc": false, - "build": null - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 245.87, - "tps_std": 1.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log", - "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.72, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 81.2, - "tps_std": 1.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.69, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 238.13, - "tps_std": 1.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.54, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 61.41, - "tps_std": 0.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.64, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 203.65, - "tps_std": 1.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.07, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 31.64, - "tps_std": 0.19, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 230.72, - "tps_std": 8.67, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 32.06, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 14.12, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 745.5, - "tps_std": 9.08, + "tps_mean": 801.06, + "tps_std": 9.05, "error": false, "error_type": null, "backend": "ROCm", @@ -4607,24 +2382,24 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "1a68ec937", + "number": "9193" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 8.42, + "tps_mean": 8.5, "tps_std": 0.0, "error": false, "error_type": null, @@ -4635,25 +2410,25 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "1a68ec937", + "number": "9193" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 164.19, - "tps_std": 1.96, + "tps_mean": 349.46, + "tps_std": 1.14, "error": false, "error_type": null, "backend": "ROCm", @@ -4663,24 +2438,24 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "1a68ec937", + "number": "9193" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 7.08, + "tps_mean": 7.12, "tps_std": 0.0, "error": false, "error_type": null, @@ -4691,25 +2466,25 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "1a68ec937", + "number": "9193" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 75.02, - "tps_std": 1.56, + "tps_mean": 209.94, + "tps_std": 1.05, "error": false, "error_type": null, "backend": "ROCm", @@ -4719,24 +2494,24 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "1a68ec937", + "number": "9193" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 6.11, + "tps_mean": 6.14, "tps_std": 0.0, "error": false, "error_type": null, @@ -4747,179 +2522,11 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 732.09, - "tps_std": 2.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 182.01, - "tps_std": 2.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 80.08, - "tps_std": 2.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 6.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -4932,8 +2539,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 716.84, - "tps_std": 2.31, + "tps_mean": 762.49, + "tps_std": 6.93, "error": false, "error_type": null, "backend": "ROCm", @@ -4946,8 +2553,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -4960,7 +2567,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 8.39, + "tps_mean": 8.49, "tps_std": 0.0, "error": false, "error_type": null, @@ -4974,8 +2581,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -4988,7 +2595,175 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 166.12, + "tps_mean": 318.28, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 188.86, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 6.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 808.74, + "tps_std": 14.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 395.07, "tps_std": 0.76, "error": false, "error_type": null, @@ -4999,179 +2774,11 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 71.24, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 6.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 726.92, - "tps_std": 2.95, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 162.4, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -5184,7 +2791,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 7.07, + "tps_mean": 7.12, "tps_std": 0.0, "error": false, "error_type": null, @@ -5198,8 +2805,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -5212,8 +2819,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 73.23, - "tps_std": 0.94, + "tps_mean": 239.54, + "tps_std": 0.33, "error": false, "error_type": null, "backend": "ROCm", @@ -5226,8 +2833,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -5240,7 +2847,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 6.11, + "tps_mean": 6.14, "tps_std": 0.0, "error": false, "error_type": null, @@ -5254,8 +2861,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -5343,8 +2950,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 164.2, - "tps_std": 0.55, + "tps_mean": 255.05, + "tps_std": 0.7, "error": false, "error_type": null, "backend": "Vulkan", @@ -5357,8 +2964,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -5371,7 +2978,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 7.94, + "tps_mean": 7.83, "tps_std": 0.0, "error": false, "error_type": null, @@ -5385,8 +2992,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -5399,570 +3006,402 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 72.41, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 42.48, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 5.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 333.46, - "tps_std": 1.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.99, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 307.47, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.6, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 250.79, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 15.03, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 267.67, - "tps_std": 1.7, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.18, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 304.86, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.79, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 249.66, - "tps_std": 0.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 15.42, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 261.51, - "tps_std": 1.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 323.83, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.34, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 290.67, + "tps_mean": 79.54, "tps_std": 0.26, "error": false, "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 35.49, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 275.89, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.85, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 382.38, + "tps_std": 5.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.4, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 348.1, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 13.96, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 272.39, + "tps_std": 5.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.5, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 379.64, + "tps_std": 0.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.15, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 333.18, + "tps_std": 0.36, + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -5973,8 +3412,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -5987,8 +3426,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 15.11, - "tps_std": 0.05, + "tps_mean": 13.88, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "ROCm", @@ -6001,8 +3440,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6015,8 +3454,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 232.54, - "tps_std": 0.27, + "tps_mean": 239.02, + "tps_std": 1.74, "error": false, "error_type": null, "backend": "ROCm", @@ -6029,8 +3468,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6043,8 +3482,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.81, - "tps_std": 0.0, + "tps_mean": 14.43, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -6057,8 +3496,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6071,8 +3510,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 284.15, - "tps_std": 0.23, + "tps_mean": 353.69, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -6085,8 +3524,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6099,8 +3538,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 15.34, - "tps_std": 0.61, + "tps_mean": 14.32, + "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", @@ -6113,8 +3552,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6127,35 +3566,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 242.09, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 15.47, + "tps_mean": 326.04, "tps_std": 0.1, "error": false, "error_type": null, @@ -6169,8 +3580,36 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.05, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6183,8 +3622,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 138.78, - "tps_std": 0.58, + "tps_mean": 160.64, + "tps_std": 0.6, "error": false, "error_type": null, "backend": "Vulkan", @@ -6197,8 +3636,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6211,8 +3650,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.45, - "tps_std": 0.09, + "tps_mean": 14.44, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", @@ -6225,8 +3664,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6239,8 +3678,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 104.96, - "tps_std": 0.02, + "tps_mean": 120.39, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", @@ -6253,8 +3692,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6267,35 +3706,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 13.66, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 85.09, + "tps_mean": 13.67, "tps_std": 0.16, "error": false, "error_type": null, @@ -6306,11 +3717,39 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 96.89, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6323,8 +3762,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 13.21, - "tps_std": 0.04, + "tps_mean": 13.3, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "Vulkan", @@ -6337,8 +3776,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6351,8 +3790,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 191.97, - "tps_std": 8.53, + "tps_mean": 270.56, + "tps_std": 2.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -6365,8 +3804,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -6379,7 +3818,119 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.7, + "tps_mean": 14.86, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 242.77, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.3, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 216.34, + "tps_std": 0.78, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 13.69, "tps_std": 0.35, "error": false, "error_type": null, @@ -6390,1396 +3941,108 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 185.9, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 14.21, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 174.22, - "tps_std": 0.19, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 13.55, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2-pr21344", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1498.5, - "tps_std": 6.83, + "tps_mean": 336.69, + "tps_std": 1.88, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log", + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2-pr21344", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 68.29, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 213.71, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.47, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 110.56, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 22.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1149.95, - "tps_std": 8.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 67.57, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 194.74, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.36, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 102.26, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 22.53, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1167.77, - "tps_std": 7.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 65.06, + "tps_mean": 20.12, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 291.69, - "tps_std": 0.26, + "tps_mean": 276.15, + "tps_std": 4.08, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 32.81, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 159.4, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 22.26, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1185.86, - "tps_std": 2.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.95, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 203.21, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 34.77, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 102.96, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 23.26, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 882.68, - "tps_std": 3.83, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 80.48, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 111.69, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.03, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 60.42, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 17.44, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1145.07, - "tps_std": 8.85, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 82.16, - "tps_std": 3.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 194.38, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 37.09, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 63.65, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 24.54, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 418.75, - "tps_std": 3.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.62, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 207.29, - "tps_std": 0.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.34, - "tps_std": 2.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 170.79, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 14.19, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 314.59, - "tps_std": 2.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.96, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 235.56, - "tps_std": 9.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.82, + "tps_mean": 17.73, "tps_std": 0.08, "error": false, "error_type": null, @@ -7790,25 +4053,25 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 152.12, - "tps_std": 0.13, + "tps_mean": 192.28, + "tps_std": 2.56, "error": false, "error_type": null, "backend": "ROCm", @@ -7818,25 +4081,25 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_2", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 14.21, - "tps_std": 0.22, + "tps_mean": 15.74, + "tps_std": 0.27, "error": false, "error_type": null, "backend": "ROCm", @@ -7846,11 +4109,11 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -7863,8 +4126,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 316.73, - "tps_std": 1.27, + "tps_mean": 336.02, + "tps_std": 1.65, "error": false, "error_type": null, "backend": "ROCm", @@ -7877,8 +4140,8 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -7891,7 +4154,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 18.86, + "tps_mean": 19.73, "tps_std": 0.02, "error": false, "error_type": null, @@ -7905,8 +4168,8 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -7919,1169 +4182,1063 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 275.81, + "tps_mean": 300.11, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.47, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 202.62, + "tps_std": 1.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 17.3, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 338.52, + "tps_std": 3.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 20.09, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 272.63, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.69, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 184.62, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 17.52, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 202.91, + "tps_std": 1.91, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.42, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 115.36, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.04, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 81.21, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 17.29, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 308.78, + "tps_std": 4.29, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.3, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 224.82, + "tps_std": 0.4, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.38, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 165.14, + "tps_std": 0.48, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 18.99, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 367.47, + "tps_std": 2.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 197.7, + "tps_std": 2.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 106.3, + "tps_std": 1.69, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 364.12, "tps_std": 1.62, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.9, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 187.01, - "tps_std": 3.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 16.91, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 318.17, - "tps_std": 2.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 19.57, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 223.61, - "tps_std": 0.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.32, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 138.8, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 17.26, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 182.98, - "tps_std": 1.82, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 102.84, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.77, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 70.58, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 16.94, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 241.15, - "tps_std": 7.43, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.75, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 202.99, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.09, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 156.41, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 18.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 513.08, - "tps_std": 3.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 23.46, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 379.44, - "tps_std": 1.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.65, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 296.43, - "tps_std": 1.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 19.6, + "tps_mean": 6.48, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 509.14, - "tps_std": 3.56, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 172.59, + "tps_std": 0.84, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 93.78, + "tps_std": 1.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 371.42, + "tps_std": 4.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.98, + "tps_mean": 6.51, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 391.47, - "tps_std": 0.29, + "tps_mean": 190.65, + "tps_std": 0.72, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 21.88, + "tps_mean": 6.14, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 264.06, - "tps_std": 0.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 20.14, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 538.42, - "tps_std": 12.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 412.11, - "tps_std": 0.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.59, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 326.1, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 19.05, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 35.0, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 35.0, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -9089,27 +5246,27 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 256.49, - "tps_std": 1.26, + "tps_mean": 105.08, + "tps_std": 0.72, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -9117,27 +5274,27 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 20.93, - "tps_std": 0.06, + "tps_mean": 5.82, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -9145,1170 +5302,1842 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 122.43, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 11.55, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 98.18, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.94, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 82.44, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 10.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 316.59, - "tps_std": 1.87, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 267.07, - "tps_std": 0.91, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.37, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 226.78, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.6-35B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 10.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1359.62, - "tps_std": 5.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 48.34, - "tps_std": 0.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 670.26, - "tps_std": 1.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.42, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 412.6, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 34.9, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1089.21, - "tps_std": 5.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 49.27, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 703.58, - "tps_std": 0.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 41.08, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 460.71, - "tps_std": 1.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 35.4, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1067.33, - "tps_std": 6.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 48.23, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 769.99, - "tps_std": 2.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 41.91, - "tps_std": 0.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 473.84, - "tps_std": 2.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 36.7, - "tps_std": 0.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1091.62, - "tps_std": 10.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 49.1, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 664.89, - "tps_std": 1.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 42.73, - "tps_std": 0.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 432.39, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 38.08, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 663.94, - "tps_std": 2.8, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.13, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 292.49, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 43.97, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 183.42, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 36.62, + "tps_mean": 80.22, "tps_std": 0.05, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 5.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 60.84, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 48.97, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 274.97, + "tps_std": 0.46, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 123.12, + "tps_std": 0.56, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 63.14, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-27B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-27B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.32, + "file_size_gib": 33.31, + "name_params_b": 27.32, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-27B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 525.94, + "tps_std": 13.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.81, + "file_size_gib": 64.6, "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 417.86, + "tps_std": 0.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 322.65, + "tps_std": 7.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 22.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 573.71, + "tps_std": 4.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 426.65, + "tps_std": 0.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.77, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 343.79, + "tps_std": 1.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 22.22, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 528.97, + "tps_std": 5.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 409.92, + "tps_std": 1.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.96, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 314.48, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 22.6, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 122.89, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 11.6, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 100.51, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.93, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 85.86, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 10.46, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 328.4, + "tps_std": 1.64, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.68, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 275.8, + "tps_std": 0.89, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 241.42, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 9.94, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1120.0, + "tps_std": 8.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.53, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 746.6, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 43.84, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 493.36, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 38.39, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1116.91, + "tps_std": 7.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.32, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 807.33, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 43.47, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 547.71, + "tps_std": 1.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 38.47, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1085.14, + "tps_std": 11.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.47, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 716.94, + "tps_std": 1.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 44.45, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 468.89, + "tps_std": 0.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 39.3, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 696.54, + "tps_std": 2.95, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 59.57, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 319.31, + "tps_std": 0.43, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 45.3, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 207.54, + "tps_std": 0.69, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, "quant": "Q4_K_XL", "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 36.74, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" } }, { @@ -10321,22 +7150,22 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1045.16, - "tps_std": 5.79, + "tps_mean": 1113.64, + "tps_std": 7.57, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.81, - "name_params_b": 34.66, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, "quant": "Q4_K_XL", "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { @@ -10349,27 +7178,867 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.82, - "tps_std": 0.08, + "tps_mean": 60.43, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 715.52, + "tps_std": 1.18, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 49.21, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 524.7, + "tps_std": 1.2, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 43.23, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 35.51, + "file_size_gib": 21.27, + "name_params_b": 35.51, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1093.92, + "tps_std": 7.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.18, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 728.92, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.0, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 486.17, + "tps_std": 0.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.45, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1095.53, + "tps_std": 11.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 787.54, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 537.23, + "tps_std": 1.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.72, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1088.56, + "tps_std": 10.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.21, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 704.46, + "tps_std": 1.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.54, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 462.61, + "tps_std": 1.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 36.31, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 640.78, + "tps_std": 2.08, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.81, + "file_size_gib": 35.8, "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.53, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 307.07, + "tps_std": 1.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.93, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 202.18, + "tps_std": 0.79, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.35, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1045.0, + "tps_std": 7.3, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.33, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "1a68ec937", + "number": "9193" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -10378,26 +8047,26 @@ "context_tokens": 32768, "test": "pp2048 @ d32768", "tps_mean": 682.8, - "tps_std": 1.45, + "tps_std": 0.96, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.81, + "file_size_gib": 35.8, "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -10405,27 +8074,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 48.77, - "tps_std": 0.07, + "tps_mean": 40.08, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.81, + "file_size_gib": 35.8, "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -10433,27 +8102,27 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 507.32, - "tps_std": 1.33, + "tps_mean": 504.87, + "tps_std": 1.05, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.81, + "file_size_gib": 35.8, "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -10461,7 +8130,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 42.75, + "tps_mean": 35.8, "tps_std": 0.05, "error": false, "error_type": null, @@ -10469,355 +8138,1027 @@ "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.81, + "file_size_gib": 35.8, "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "1a68ec937", + "number": "9193" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1222.77, - "tps_std": 2.88, + "tps_mean": 909.04, + "tps_std": 7.54, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 43.72, - "tps_std": 0.08, + "tps_mean": 24.64, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 653.49, - "tps_std": 0.18, + "tps_mean": 667.44, + "tps_std": 3.68, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 37.21, - "tps_std": 0.02, + "tps_mean": 21.59, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 414.31, + "tps_mean": 446.88, + "tps_std": 2.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 20.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 893.46, + "tps_std": 6.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 691.94, + "tps_std": 9.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.39, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 470.28, + "tps_std": 3.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 20.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 870.72, + "tps_std": 80.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 24.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 638.41, + "tps_std": 6.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.58, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 421.88, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 20.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 123.59, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 60.07, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 39.84, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 15.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 587.64, + "tps_std": 6.59, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.6, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 428.06, + "tps_std": 1.23, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.99, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 331.82, "tps_std": 0.61, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 32.3, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1043.12, - "tps_std": 46.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.53, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 544.04, - "tps_std": 1.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 37.7, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 414.74, - "tps_std": 1.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 32.74, + "tps_mean": 16.12, "tps_std": 0.02, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1329.04, + "tps_std": 9.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 48.46, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 725.46, + "tps_std": 9.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.84, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 471.3, + "tps_std": 1.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.61, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10825,27 +9166,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1049.22, - "tps_std": 7.25, + "tps_mean": 1334.91, + "tps_std": 9.9, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10853,27 +9194,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 43.1, - "tps_std": 0.01, + "tps_mean": 47.74, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10881,27 +9222,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 758.76, - "tps_std": 0.99, + "tps_mean": 761.5, + "tps_std": 12.52, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10909,27 +9250,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 38.7, - "tps_std": 0.01, + "tps_mean": 37.29, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10937,27 +9278,27 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 515.64, - "tps_std": 0.13, + "tps_mean": 499.49, + "tps_std": 1.7, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10965,335 +9306,335 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 34.53, + "tps_mean": 35.18, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1194.14, + "tps_std": 6.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 48.47, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 674.07, + "tps_std": 6.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.68, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 437.2, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.52, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 765.18, + "tps_std": 2.13, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.04, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 101.11, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.34, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 54.46, "tps_std": 0.03, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1036.89, - "tps_std": 9.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 510.38, - "tps_std": 2.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.18, - "tps_std": 1.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 375.11, - "tps_std": 2.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 35.02, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 576.25, - "tps_std": 2.16, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.25, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 273.77, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.71, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 175.02, - "tps_std": 1.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -11301,27 +9642,27 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 31.29, + "tps_mean": 30.34, "tps_std": 0.05, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -11329,27 +9670,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 893.79, - "tps_std": 4.74, + "tps_mean": 1324.66, + "tps_std": 8.47, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -11357,27 +9698,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 46.05, - "tps_std": 0.07, + "tps_mean": 54.67, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -11385,27 +9726,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 610.84, - "tps_std": 1.99, + "tps_mean": 682.46, + "tps_std": 2.1, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -11413,2163 +9754,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 39.67, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 468.83, - "tps_std": 1.17, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 35.46, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 35.8, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 836.74, - "tps_std": 6.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.95, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 619.85, - "tps_std": 6.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.38, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 408.04, - "tps_std": 3.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 19.52, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 823.65, - "tps_std": 83.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 628.14, - "tps_std": 5.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.15, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 403.29, - "tps_std": 1.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 19.35, - "tps_std": 0.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 796.06, - "tps_std": 126.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.68, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 628.2, - "tps_std": 5.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 426.61, - "tps_std": 3.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 19.55, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 839.13, - "tps_std": 7.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 20.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 646.67, - "tps_std": 11.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.86, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 441.14, - "tps_std": 2.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 17.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 844.42, - "tps_std": 19.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 546.66, - "tps_std": 5.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.11, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 345.7, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 19.16, - "tps_std": 0.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 117.96, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.18, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 58.94, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 39.57, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 14.1, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 376.52, - "tps_std": 7.37, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 351.87, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 13.71, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 281.32, - "tps_std": 0.7, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 13.15, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 47.02, - "name_params_b": 25.23, - "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1608.01, - "tps_std": 5.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 47.13, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 689.3, - "tps_std": 12.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 37.14, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 446.29, - "tps_std": 0.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 35.16, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1299.9, - "tps_std": 11.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 46.59, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 693.48, - "tps_std": 4.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.76, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 453.92, - "tps_std": 5.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 34.85, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1301.66, - "tps_std": 10.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 46.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 693.56, - "tps_std": 10.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.83, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 450.75, - "tps_std": 0.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 34.84, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1288.38, - "tps_std": 10.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.58, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 721.84, - "tps_std": 10.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.17, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 469.64, - "tps_std": 3.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 34.31, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1219.74, - "tps_std": 15.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 46.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 600.42, - "tps_std": 7.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.88, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 374.22, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 34.77, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 720.78, - "tps_std": 2.94, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.21, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 100.79, + "tps_mean": 42.4, "tps_std": 0.03, "error": false, "error_type": null, @@ -13580,25 +9765,25 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.41, - "tps_std": 0.01, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 469.95, + "tps_std": 0.4, "error": false, "error_type": null, "backend": "Vulkan", @@ -13608,24 +9793,612 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 37.46, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1354.0, + "tps_std": 8.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 42.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 725.86, + "tps_std": 11.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.01, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 471.81, + "tps_std": 3.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1356.47, + "tps_std": 9.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 769.66, + "tps_std": 14.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.66, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 54.9, + "tps_mean": 504.88, + "tps_std": 0.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.09, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1342.08, + "tps_std": 8.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 42.32, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 692.34, + "tps_std": 8.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.0, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 447.95, + "tps_std": 0.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.16, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 669.11, + "tps_std": 1.51, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 43.93, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 99.13, "tps_std": 0.12, "error": false, "error_type": null, @@ -13633,19 +10406,75 @@ "ngl": 99, "mmap": 0, "params_b": 25.23, - "file_size_gib": 15.9, + "file_size_gib": 25.94, "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.25, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 53.86, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -13653,7 +10482,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 29.52, + "tps_mean": 27.52, "tps_std": 0.03, "error": false, "error_type": null, @@ -13661,19 +10490,19 @@ "ngl": 99, "mmap": 0, "params_b": 25.23, - "file_size_gib": 15.9, + "file_size_gib": 25.94, "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -13681,27 +10510,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1213.78, - "tps_std": 7.36, + "tps_mean": 1291.45, + "tps_std": 8.95, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 25.23, - "file_size_gib": 15.9, + "file_size_gib": 25.94, "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -13709,1758 +10538,274 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 53.04, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 638.76, - "tps_std": 1.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.93, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 445.0, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 36.64, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1524.44, - "tps_std": 7.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.68, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 689.36, - "tps_std": 8.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.67, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 436.86, - "tps_std": 3.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 31.55, - "tps_std": 0.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1272.69, - "tps_std": 74.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.31, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 696.25, - "tps_std": 7.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.95, - "tps_std": 0.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 452.1, - "tps_std": 0.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 29.87, - "tps_std": 3.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1301.93, - "tps_std": 18.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.35, + "tps_mean": 45.48, "tps_std": 0.03, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 25.23, "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 695.0, - "tps_std": 7.26, + "tps_mean": 672.93, + "tps_std": 0.29, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 25.23, "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.41, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 447.63, - "tps_std": 3.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 31.76, + "tps_mean": 36.81, "tps_std": 0.04, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 25.23, "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 465.71, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.9, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1303.32, - "tps_std": 8.77, + "tps_mean": 428.07, + "tps_std": 3.61, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 38.81, - "tps_std": 0.01, + "tps_mean": 3.55, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 727.92, - "tps_std": 8.62, + "tps_mean": 167.2, + "tps_std": 1.91, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 32.52, + "tps_mean": 3.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 108.99, "tps_std": 0.35, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 451.48, - "tps_std": 1.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 30.76, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1294.4, - "tps_std": 31.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 596.78, - "tps_std": 7.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 363.83, - "tps_std": 2.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 31.68, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 589.2, - "tps_std": 1.79, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 43.18, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 97.7, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.82, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 54.04, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 26.65, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 944.96, - "tps_std": 19.24, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 43.58, - "tps_std": 1.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 579.88, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.52, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 415.94, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 31.99, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 25.94, - "name_params_b": 25.23, - "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 380.58, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, "params_b": 30.7, "file_size_gib": 57.18, "name_params_b": 30.7, "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 150.63, - "tps_std": 1.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.17, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 96.84, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 3.07, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 377.93, - "tps_std": 2.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 153.11, - "tps_std": 1.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.18, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 98.23, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 3.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 376.47, - "tps_std": 1.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 152.54, - "tps_std": 2.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15472,9 +10817,9 @@ "fa": true, "context": "longctx65536", "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 98.53, - "tps_std": 0.11, + "test": "tg32 @ d65536", + "tps_mean": 3.13, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -15487,36 +10832,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 3.08, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15529,63 +10846,7 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 360.84, - "tps_std": 1.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 154.86, + "tps_mean": 410.15, "tps_std": 2.16, "error": false, "error_type": null, @@ -15596,11 +10857,67 @@ "file_size_gib": 57.18, "name_params_b": 30.7, "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 172.49, + "tps_std": 2.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15613,8 +10930,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.14, - "tps_std": 0.01, + "tps_mean": 3.21, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -15627,8 +10944,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15641,8 +10958,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 101.08, - "tps_std": 0.13, + "tps_mean": 114.16, + "tps_std": 0.63, "error": false, "error_type": null, "backend": "ROCm", @@ -15655,8 +10972,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15669,8 +10986,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 2.94, - "tps_std": 0.02, + "tps_mean": 3.12, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15683,8 +11000,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15697,8 +11014,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 373.84, - "tps_std": 0.93, + "tps_mean": 423.05, + "tps_std": 6.41, "error": false, "error_type": null, "backend": "ROCm", @@ -15711,8 +11028,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15725,7 +11042,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 3.41, + "tps_mean": 3.53, "tps_std": 0.0, "error": false, "error_type": null, @@ -15739,8 +11056,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15753,8 +11070,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 139.53, - "tps_std": 1.98, + "tps_mean": 158.77, + "tps_std": 2.07, "error": false, "error_type": null, "backend": "ROCm", @@ -15767,8 +11084,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15781,8 +11098,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.16, - "tps_std": 0.05, + "tps_mean": 3.25, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -15795,8 +11112,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15809,8 +11126,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 83.27, - "tps_std": 0.13, + "tps_mean": 102.83, + "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", @@ -15823,8 +11140,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15837,7 +11154,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 3.04, + "tps_mean": 3.14, "tps_std": 0.0, "error": false, "error_type": null, @@ -15851,8 +11168,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15940,8 +11257,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 79.53, - "tps_std": 0.26, + "tps_mean": 135.94, + "tps_std": 0.46, "error": false, "error_type": null, "backend": "Vulkan", @@ -15954,8 +11271,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15982,8 +11299,8 @@ "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -15996,850 +11313,514 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 60.29, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 45.96, - "tps_std": 1.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 3.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 293.27, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 136.84, - "tps_std": 1.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 91.86, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 7.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 310.54, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 140.71, - "tps_std": 1.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.11, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 93.57, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 7.52, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 309.16, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 139.29, - "tps_std": 1.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.1, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 92.8, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 7.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 309.22, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 143.91, - "tps_std": 1.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 96.98, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 7.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 315.72, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 17.46, - "name_params_b": 30.7, - "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 130.8, + "tps_mean": 92.4, "tps_std": 1.45, "error": false, "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 67.88, + "tps_std": 1.23, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 322.69, + "tps_std": 0.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 150.43, + "tps_std": 1.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 101.59, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 7.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 317.49, + "tps_std": 0.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 154.73, + "tps_std": 1.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.95, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 106.22, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 7.38, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 313.83, + "tps_std": 2.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.55, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 142.53, + "tps_std": 1.4, + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -16850,8 +11831,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -16864,8 +11845,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.2, - "tps_std": 0.0, + "tps_mean": 8.21, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -16878,8 +11859,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -16892,8 +11873,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 84.96, - "tps_std": 0.07, + "tps_mean": 95.32, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", @@ -16906,8 +11887,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -16920,7 +11901,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 7.56, + "tps_mean": 7.58, "tps_std": 0.02, "error": false, "error_type": null, @@ -16934,8 +11915,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -16948,7 +11929,7 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 69.12, + "tps_mean": 70.48, "tps_std": 0.02, "error": false, "error_type": null, @@ -16962,8 +11943,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -16976,7 +11957,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 10.71, + "tps_mean": 11.32, "tps_std": 0.0, "error": false, "error_type": null, @@ -16990,8 +11971,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17054,8 +12035,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 244.46, - "tps_std": 0.27, + "tps_mean": 301.53, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "Vulkan", @@ -17068,8 +12049,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17082,8 +12063,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 11.02, - "tps_std": 0.01, + "tps_mean": 11.45, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -17096,8 +12077,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17110,7 +12091,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 124.32, + "tps_mean": 146.08, "tps_std": 1.04, "error": false, "error_type": null, @@ -17124,8 +12105,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17138,7 +12119,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.78, + "tps_mean": 9.09, "tps_std": 0.0, "error": false, "error_type": null, @@ -17152,8 +12133,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17166,8 +12147,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 76.03, - "tps_std": 1.52, + "tps_mean": 86.61, + "tps_std": 2.84, "error": false, "error_type": null, "backend": "Vulkan", @@ -17180,8 +12161,8 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17194,7 +12175,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 7.78, + "tps_mean": 7.88, "tps_std": 0.0, "error": false, "error_type": null, @@ -17208,22 +12189,22 @@ "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 282.42, - "tps_std": 0.4, + "tps_mean": 326.41, + "tps_std": 1.5, "error": false, "error_type": null, "backend": "ROCm", @@ -17233,24 +12214,24 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 6.16, + "tps_mean": 6.17, "tps_std": 0.0, "error": false, "error_type": null, @@ -17261,25 +12242,25 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 132.6, - "tps_std": 1.37, + "tps_mean": 150.02, + "tps_std": 1.17, "error": false, "error_type": null, "backend": "ROCm", @@ -17289,19 +12270,19 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -17317,25 +12298,25 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 90.15, - "tps_std": 0.22, + "tps_mean": 100.93, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", @@ -17345,24 +12326,24 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 5.0, + "tps_mean": 5.01, "tps_std": 0.0, "error": false, "error_type": null, @@ -17373,25 +12354,25 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 308.17, - "tps_std": 0.62, + "tps_mean": 325.63, + "tps_std": 1.72, "error": false, "error_type": null, "backend": "ROCm", @@ -17401,501 +12382,193 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 155.9, + "tps_std": 2.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 106.02, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 333.66, + "tps_std": 1.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", "tps_mean": 6.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 138.85, - "tps_std": 1.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 93.92, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 5.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 309.61, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 6.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 139.33, - "tps_std": 1.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 93.3, - "tps_std": 0.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 5.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 303.4, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 6.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 141.37, - "tps_std": 1.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.18, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 93.78, - "tps_std": 0.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 4.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 319.75, - "tps_std": 0.26, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -17908,36 +12581,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-UD-Q8_K_XL", - "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 6.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 32.6, - "name_params_b": 30.7, - "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17950,8 +12595,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 128.0, - "tps_std": 1.3, + "tps_mean": 143.8, + "tps_std": 1.56, "error": false, "error_type": null, "backend": "ROCm", @@ -17964,8 +12609,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -17979,7 +12624,7 @@ "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 5.3, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -17992,8 +12637,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18006,8 +12651,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 81.58, - "tps_std": 0.14, + "tps_mean": 96.12, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", @@ -18020,8 +12665,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18034,8 +12679,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 4.99, - "tps_std": 0.05, + "tps_mean": 5.03, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -18048,8 +12693,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18062,8 +12707,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 72.26, - "tps_std": 0.02, + "tps_mean": 73.59, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -18076,8 +12721,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18090,7 +12735,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 6.28, + "tps_mean": 6.21, "tps_std": 0.0, "error": false, "error_type": null, @@ -18104,8 +12749,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18168,8 +12813,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 209.2, - "tps_std": 6.85, + "tps_mean": 272.1, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "Vulkan", @@ -18182,8 +12827,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18196,7 +12841,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 6.28, + "tps_mean": 6.3, "tps_std": 0.0, "error": false, "error_type": null, @@ -18210,8 +12855,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18224,8 +12869,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 112.72, - "tps_std": 1.26, + "tps_mean": 141.06, + "tps_std": 0.76, "error": false, "error_type": null, "backend": "Vulkan", @@ -18238,8 +12883,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18252,7 +12897,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.49, + "tps_mean": 5.52, "tps_std": 0.0, "error": false, "error_type": null, @@ -18266,8 +12911,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18280,8 +12925,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 71.68, - "tps_std": 0.77, + "tps_mean": 88.45, + "tps_std": 3.85, "error": false, "error_type": null, "backend": "Vulkan", @@ -18294,8 +12939,8 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18308,7 +12953,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 5.02, + "tps_mean": 5.05, "tps_std": 0.0, "error": false, "error_type": null, @@ -18322,22 +12967,22 @@ "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 945.82, - "tps_std": 60.17, + "tps_mean": 625.58, + "tps_std": 109.36, "error": false, "error_type": null, "backend": "ROCm", @@ -18347,192 +12992,24 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2-pr21344", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.25, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 311.38, - "tps_std": 1.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.94, - "tps_std": 7.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 175.99, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 27.44, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 634.64, - "tps_std": 3.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.7, + "tps_mean": 51.81, "tps_std": 0.04, "error": false, "error_type": null, @@ -18543,179 +13020,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 304.96, - "tps_std": 1.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.84, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 175.32, - "tps_std": 0.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 24.52, - "tps_std": 5.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 635.33, - "tps_std": 4.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.99, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18728,8 +13037,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 302.31, - "tps_std": 0.46, + "tps_mean": 589.25, + "tps_std": 0.34, "error": false, "error_type": null, "backend": "ROCm", @@ -18742,8 +13051,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18756,8 +13065,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 35.94, - "tps_std": 0.12, + "tps_mean": 36.32, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", @@ -18770,8 +13079,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18784,8 +13093,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 175.25, - "tps_std": 0.49, + "tps_mean": 371.47, + "tps_std": 2.43, "error": false, "error_type": null, "backend": "ROCm", @@ -18798,8 +13107,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18812,8 +13121,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 27.78, - "tps_std": 0.06, + "tps_mean": 26.11, + "tps_std": 3.5, "error": false, "error_type": null, "backend": "ROCm", @@ -18826,8 +13135,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18840,8 +13149,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 648.77, - "tps_std": 4.37, + "tps_mean": 675.76, + "tps_std": 4.24, "error": false, "error_type": null, "backend": "ROCm", @@ -18854,8 +13163,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18868,8 +13177,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 49.76, - "tps_std": 0.02, + "tps_mean": 51.02, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -18882,8 +13191,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18896,8 +13205,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 439.58, - "tps_std": 0.57, + "tps_mean": 603.27, + "tps_std": 1.54, "error": false, "error_type": null, "backend": "ROCm", @@ -18910,8 +13219,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -18924,147 +13233,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 39.77, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 273.87, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 32.17, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 641.05, - "tps_std": 2.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.73, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 305.81, + "tps_mean": 40.02, "tps_std": 0.24, "error": false, "error_type": null, @@ -19075,11 +13244,123 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 379.11, + "tps_std": 1.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.99, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 684.87, + "tps_std": 3.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.48, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19091,9 +13372,9 @@ "fa": true, "context": "longctx32768", "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.03, - "tps_std": 0.02, + "test": "pp2048 @ d32768", + "tps_mean": 653.09, + "tps_std": 1.74, "error": false, "error_type": null, "backend": "ROCm", @@ -19106,8 +13387,36 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.34, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19120,8 +13429,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 177.27, - "tps_std": 0.45, + "tps_mean": 412.83, + "tps_std": 4.5, "error": false, "error_type": null, "backend": "ROCm", @@ -19134,8 +13443,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19148,7 +13457,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 27.69, + "tps_mean": 28.15, "tps_std": 0.13, "error": false, "error_type": null, @@ -19162,8 +13471,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19176,8 +13485,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 575.01, - "tps_std": 3.11, + "tps_mean": 664.67, + "tps_std": 3.1, "error": false, "error_type": null, "backend": "Vulkan", @@ -19190,8 +13499,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19204,8 +13513,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.2, - "tps_std": 0.06, + "tps_mean": 51.42, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "Vulkan", @@ -19218,8 +13527,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19232,8 +13541,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 214.22, - "tps_std": 0.5, + "tps_mean": 234.02, + "tps_std": 0.46, "error": false, "error_type": null, "backend": "Vulkan", @@ -19246,8 +13555,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19260,7 +13569,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 34.54, + "tps_mean": 35.87, "tps_std": 0.04, "error": false, "error_type": null, @@ -19274,8 +13583,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19288,8 +13597,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 129.83, - "tps_std": 0.04, + "tps_mean": 143.42, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "Vulkan", @@ -19302,8 +13611,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19316,8 +13625,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 26.17, - "tps_std": 0.07, + "tps_mean": 27.87, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -19330,8 +13639,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19344,8 +13653,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 635.12, - "tps_std": 4.23, + "tps_mean": 719.91, + "tps_std": 4.36, "error": false, "error_type": null, "backend": "Vulkan", @@ -19358,8 +13667,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19372,7 +13681,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 56.31, + "tps_mean": 56.61, "tps_std": 0.08, "error": false, "error_type": null, @@ -19386,8 +13695,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19400,8 +13709,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 290.19, - "tps_std": 1.03, + "tps_mean": 307.5, + "tps_std": 0.37, "error": false, "error_type": null, "backend": "Vulkan", @@ -19414,8 +13723,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -19428,260 +13737,92 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 42.28, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 139.61, - "tps_std": 0.56, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 35.0, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2029.48, - "tps_std": 6.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.53, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 494.19, - "tps_std": 1.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 277.16, - "tps_std": 1.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 40.39, + "tps_mean": 43.01, "tps_std": 0.04, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 145.92, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 34.97, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1631.69, - "tps_std": 17.03, + "tps_mean": 1786.17, + "tps_std": 19.42, "error": false, "error_type": null, "backend": "ROCm", @@ -19691,24 +13832,192 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "f53577432", - "number": "8942" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2", + "env": "rocm-7_2_3", "env_base": "rocm", - "env_variant": "7_2_2", + "env_variant": "7_2_3", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 72.53, + "tps_mean": 73.05, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1003.11, + "tps_std": 2.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 52.17, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 601.63, + "tps_std": 1.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 40.68, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1787.57, + "tps_std": 15.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.68, "tps_std": 0.1, "error": false, "error_type": null, @@ -19719,347 +14028,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 490.56, - "tps_std": 2.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.69, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 275.76, - "tps_std": 1.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 40.22, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1636.3, - "tps_std": 15.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.58, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 487.06, - "tps_std": 3.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.73, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 276.26, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 40.32, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_3__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "8e1f9d083", - "number": "9112" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1694.78, - "tps_std": 17.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.19, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -20072,8 +14045,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 730.43, - "tps_std": 1.15, + "tps_mean": 1027.43, + "tps_std": 1.91, "error": false, "error_type": null, "backend": "ROCm", @@ -20086,8 +14059,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -20100,962 +14073,598 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 57.22, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 440.23, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 47.51, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1635.73, - "tps_std": 10.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.47, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 493.84, - "tps_std": 1.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 52.05, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 277.7, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 40.28, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1306.44, - "tps_std": 9.57, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.63, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 365.33, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 49.83, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 210.72, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 38.53, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "ab6120cde", - "number": "8997" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1574.45, - "tps_std": 16.63, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 79.03, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 545.65, + "tps_mean": 57.29, "tps_std": 0.37, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 60.7, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 229.28, - "tps_std": 0.91, + "tps_mean": 613.02, + "tps_std": 1.23, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 50.14, - "tps_std": 0.11, + "tps_mean": 47.67, + "tps_std": 0.25, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1512.85, - "tps_std": 4.84, + "tps_mean": 1812.57, + "tps_std": 16.28, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.54, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 163.39, - "tps_std": 1.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 72.65, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2-pr21344", - "env_base": "rocm", - "env_variant": "7_2_2-pr21344", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "tg32 @ d65536", - "tps_mean": 3.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1542.98, - "tps_std": 7.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.52, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 162.64, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f53577432", - "number": "8942" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", - "fa": true, - "context": "longctx65536", - "context_tokens": 65536, - "test": "pp2048 @ d65536", - "tps_mean": 72.92, + "tps_mean": 73.03, "tps_std": 0.24, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_2", - "env_base": "rocm", - "env_variant": "7_2_2", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1124.14, + "tps_std": 2.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 52.23, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 679.48, + "tps_std": 9.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 3.0, - "tps_std": 0.0, + "tps_mean": 40.79, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1543.35, - "tps_std": 8.0, + "tps_mean": 1376.47, + "tps_std": 10.73, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_3__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_3", - "env_base": "rocm", - "env_variant": "7_2_3", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.5, + "tps_mean": 74.15, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 388.64, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.77, "tps_std": 0.12, "error": false, "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 228.5, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 40.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1692.49, + "tps_std": 16.61, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 79.78, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 555.73, + "tps_std": 1.5, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 61.12, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 237.74, + "tps_std": 1.25, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 50.13, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1545.36, + "tps_std": 22.91, + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -21066,8 +14675,36 @@ "log": "results/llama-2-7b.Q4_0__rocm-7_2_3__fa1.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_3", + "env_base": "rocm", + "env_variant": "7_2_3", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.59, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_3__fa1.log", + "rpc": false, + "build": { + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21080,8 +14717,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 159.17, - "tps_std": 0.71, + "tps_mean": 159.95, + "tps_std": 1.29, "error": false, "error_type": null, "backend": "ROCm", @@ -21094,8 +14731,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21108,7 +14745,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.64, + "tps_mean": 5.6, "tps_std": 0.0, "error": false, "error_type": null, @@ -21122,8 +14759,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21136,8 +14773,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 72.03, - "tps_std": 0.23, + "tps_mean": 71.44, + "tps_std": 0.29, "error": false, "error_type": null, "backend": "ROCm", @@ -21150,8 +14787,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21178,8 +14815,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7_2_3__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "8e1f9d083", - "number": "9112" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21192,8 +14829,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1524.35, - "tps_std": 2.98, + "tps_mean": 1532.82, + "tps_std": 13.52, "error": false, "error_type": null, "backend": "ROCm", @@ -21206,8 +14843,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21220,8 +14857,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.78, - "tps_std": 0.02, + "tps_mean": 50.83, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -21234,8 +14871,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21248,8 +14885,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 196.44, - "tps_std": 1.93, + "tps_mean": 187.91, + "tps_std": 1.55, "error": false, "error_type": null, "backend": "ROCm", @@ -21262,8 +14899,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21276,7 +14913,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.96, + "tps_mean": 6.94, "tps_std": 0.0, "error": false, "error_type": null, @@ -21290,8 +14927,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21304,8 +14941,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 83.57, - "tps_std": 0.98, + "tps_mean": 82.46, + "tps_std": 0.59, "error": false, "error_type": null, "backend": "ROCm", @@ -21318,8 +14955,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21332,8 +14969,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 3.74, - "tps_std": 0.02, + "tps_mean": 3.76, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -21346,8 +14983,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21360,8 +14997,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1513.21, - "tps_std": 2.79, + "tps_mean": 1516.11, + "tps_std": 11.19, "error": false, "error_type": null, "backend": "ROCm", @@ -21374,8 +15011,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21388,8 +15025,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.41, - "tps_std": 0.13, + "tps_mean": 51.27, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -21402,8 +15039,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21416,8 +15053,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 189.28, - "tps_std": 0.4, + "tps_mean": 204.17, + "tps_std": 1.96, "error": false, "error_type": null, "backend": "ROCm", @@ -21430,8 +15067,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21444,7 +15081,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.62, + "tps_mean": 7.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -21458,8 +15095,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21472,8 +15109,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 85.33, - "tps_std": 0.3, + "tps_mean": 94.78, + "tps_std": 1.11, "error": false, "error_type": null, "backend": "ROCm", @@ -21486,8 +15123,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21500,8 +15137,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 2.9, - "tps_std": 0.03, + "tps_mean": 3.79, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -21514,8 +15151,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21528,8 +15165,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 326.38, - "tps_std": 0.44, + "tps_mean": 347.25, + "tps_std": 0.61, "error": false, "error_type": null, "backend": "Vulkan", @@ -21542,8 +15179,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21556,8 +15193,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 55.44, - "tps_std": 0.11, + "tps_mean": 55.69, + "tps_std": 0.15, "error": false, "error_type": null, "backend": "Vulkan", @@ -21570,8 +15207,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21584,8 +15221,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 145.14, - "tps_std": 0.2, + "tps_mean": 155.9, + "tps_std": 0.09, "error": false, "error_type": null, "backend": "Vulkan", @@ -21598,8 +15235,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21612,7 +15249,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 10.21, + "tps_mean": 9.42, "tps_std": 0.0, "error": false, "error_type": null, @@ -21626,8 +15263,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21640,8 +15277,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 89.9, - "tps_std": 0.68, + "tps_mean": 102.05, + "tps_std": 0.15, "error": false, "error_type": null, "backend": "Vulkan", @@ -21654,8 +15291,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21668,7 +15305,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 4.61, + "tps_mean": 5.12, "tps_std": 0.0, "error": false, "error_type": null, @@ -21682,8 +15319,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21696,8 +15333,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1311.79, - "tps_std": 0.38, + "tps_mean": 1337.7, + "tps_std": 0.75, "error": false, "error_type": null, "backend": "Vulkan", @@ -21710,8 +15347,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21724,8 +15361,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 55.59, - "tps_std": 0.03, + "tps_mean": 55.73, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -21738,8 +15375,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21752,8 +15389,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 210.35, - "tps_std": 0.6, + "tps_mean": 216.79, + "tps_std": 1.07, "error": false, "error_type": null, "backend": "Vulkan", @@ -21766,8 +15403,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21780,7 +15417,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.53, + "tps_mean": 8.47, "tps_std": 0.01, "error": false, "error_type": null, @@ -21794,8 +15431,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21808,8 +15445,8 @@ "context": "longctx65536", "context_tokens": 65536, "test": "pp2048 @ d65536", - "tps_mean": 104.4, - "tps_std": 1.78, + "tps_mean": 79.73, + "tps_std": 0.65, "error": false, "error_type": null, "backend": "Vulkan", @@ -21822,8 +15459,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } }, { @@ -21836,7 +15473,7 @@ "context": "longctx65536", "context_tokens": 65536, "test": "tg32 @ d65536", - "tps_mean": 4.6, + "tps_mean": 4.58, "tps_std": 0.0, "error": false, "error_type": null, @@ -21850,8 +15487,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log", "rpc": false, "build": { - "hash": "ab6120cde", - "number": "8997" + "hash": "0253fb21f", + "number": "9187" } } ] diff --git a/toolboxes/Dockerfile.rocm-6.4.4 b/toolboxes/Dockerfile.rocm-6.4.4 index c15f2cb..54c5c1e 100644 --- a/toolboxes/Dockerfile.rocm-6.4.4 +++ b/toolboxes/Dockerfile.rocm-6.4.4 @@ -1,24 +1,32 @@ # build stage FROM registry.fedoraproject.org/fedora:43 AS builder +# we want to keep ALL dnf downloaded packages in dnf cache, +# this is not the default, hackily add it to the config, +# assume there is not much in it. should be improved +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf + # deps + rocm toolchain (native fedora packages) -RUN dnf -y --nodocs --setopt=install_weak_deps=False \ +RUN --mount=type=cache,target=/var/cache/libdnf5 \ + dnf -y --nodocs --setopt=install_weak_deps=False \ install \ make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ rocminfo radeontop 'rocm-*' 'rocblas-*' hipblas 'hipblas-*' \ - git vim rsync sudo tar xz patch \ - && dnf clean all && rm -rf /var/cache/dnf/* + git vim rsync sudo tar xz patch # llama.cpp WORKDIR /opt/llama.cpp ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master -RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +RUN --mount=type=cache,id=rocm-644,target=/opt/llama.cpp \ + test -d .git || git clone -b ${BRANCH} --single-branch --recursive ${REPO} . COPY llama-grammar.patch /tmp/llama-grammar.patch # build + install -RUN git clean -xdf \ +RUN --mount=type=cache,id=rocm-644,target=/opt/llama.cpp \ + git reset --hard \ + && git pull \ && git submodule update --recursive \ && patch -p1 < /tmp/llama-grammar.patch \ && HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ @@ -32,30 +40,33 @@ RUN git clean -xdf \ && cmake --install build --config Release # libs -RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ +RUN --mount=type=cache,id=rocm-644,target=/opt/llama.cpp \ + mkdir -p /usr/local/lib64 \ + && find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/local/lib64/ \; \ && ldconfig # runtime stage FROM registry.fedoraproject.org/fedora-minimal:43 +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf # runtime deps (native fedora rocm + utilities) -RUN microdnf -y --nodocs --setopt=install_weak_deps=0 \ +RUN --mount=type=cache,target=/var/cache/libdnf5 \ + microdnf -y --nodocs --setopt=install_weak_deps=0 \ install \ bash ca-certificates libatomic libstdc++ libgcc libgomp sudo \ rocminfo radeontop 'rocm-*' 'rocblas-*' hipblas 'hipblas-*' \ - procps-ng \ - && microdnf clean all && rm -rf /var/cache/dnf/* + procps-ng # copy COPY --from=builder /usr/local/ /usr/local/ COPY --from=builder /usr/include/rocwmma /usr/include/rocwmma -COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ # ld RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ && ldconfig \ && cp -n /usr/local/lib/libllama*.so* /usr/lib64/ 2>/dev/null || true \ + && cp -n /usr/local/lib64/libllama*.so* /usr/lib64/ 2>/dev/null || true \ && ldconfig # helper diff --git a/toolboxes/Dockerfile.rocm-7.2.3 b/toolboxes/Dockerfile.rocm-7.2.3 index 57e63a5..73389ff 100644 --- a/toolboxes/Dockerfile.rocm-7.2.3 +++ b/toolboxes/Dockerfile.rocm-7.2.3 @@ -14,16 +14,21 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key REPO EOF +# we want to keep ALL dnf downloaded packages in dnf cache, +# this is not the default, hackily add it to the config, +# assume there is not much in it. should be improved +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf + # deps -RUN dnf -y --nodocs --setopt=install_weak_deps=False \ +RUN --mount=type=cache,id=dnf-723,target=/var/cache/libdnf5 \ + dnf -y --nodocs --setopt=install_weak_deps=False \ --exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \ install \ make gcc cmake lld clang clang-devel compiler-rt libcurl-devel ninja-build \ rocm-llvm rocm-device-libs hip-runtime-amd hip-devel \ rocblas rocblas-devel hipblas hipblas-devel rocm-cmake libomp-devel libomp \ rocminfo radeontop \ - git-core vim sudo rsync patch \ - && dnf clean all && rm -rf /var/cache/dnf/* + git-core vim sudo rsync patch # rocm env ENV ROCM_PATH=/opt/rocm \ @@ -36,12 +41,15 @@ ENV ROCM_PATH=/opt/rocm \ WORKDIR /opt/llama.cpp ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master -RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +RUN --mount=type=cache,id=rocm-723,target=/opt/llama.cpp \ + test -d .git || git clone -b ${BRANCH} --single-branch --recursive ${REPO} . COPY llama-grammar.patch /tmp/llama-grammar.patch # build -RUN git clean -xdf \ +RUN --mount=type=cache,id=rocm-723,target=/opt/llama.cpp \ + git reset --hard \ + && git pull \ && git submodule update --recursive \ && patch -p1 < /tmp/llama-grammar.patch \ && cmake -S . -B build \ @@ -59,7 +67,9 @@ RUN git clean -xdf \ && cmake --install build --config Release # libs -RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ +RUN --mount=type=cache,id=rocm-723,target=/opt/llama.cpp \ + mkdir -p /usr/local/lib64 \ + && find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/local/lib64/ \; \ && ldconfig # helper @@ -67,7 +77,8 @@ COPY gguf-vram-estimator.py /usr/local/bin/gguf-vram-estimator.py RUN chmod +x /usr/local/bin/gguf-vram-estimator.py # runtime stage -FROM registry.fedoraproject.org/fedora-minimal:43 +FROM registry.fedoraproject.org/fedora-minimal:43 AS runtime +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf # rocm 7.2.3 repo RUN <<'EOF' @@ -83,23 +94,23 @@ REPO EOF # runtime deps -RUN microdnf -y --nodocs --setopt=install_weak_deps=0 \ +RUN --mount=type=cache,id=dnf-723,target=/var/cache/libdnf5 \ + microdnf -y --nodocs --setopt=install_weak_deps=0 \ --exclude='*sdk*' --exclude='*samples*' --exclude='*-doc*' --exclude='*-docs*' \ install \ bash ca-certificates libatomic libstdc++ libgcc libgomp sudo \ hip-runtime-amd rocblas hipblas \ - rocminfo radeontop procps-ng \ - && microdnf clean all && rm -rf /var/cache/dnf/* + rocminfo radeontop procps-ng # copy COPY --from=builder /usr/local/ /usr/local/ -COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ # ld RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ && ldconfig \ && cp -n /usr/local/lib/libllama*.so* /usr/lib64/ 2>/dev/null || true \ + && cp -n /usr/local/lib64/libllama*.so* /usr/lib64/ 2>/dev/null || true \ && ldconfig # helper diff --git a/toolboxes/Dockerfile.rocm7-nightlies b/toolboxes/Dockerfile.rocm7-nightlies index 922c871..ce48205 100644 --- a/toolboxes/Dockerfile.rocm7-nightlies +++ b/toolboxes/Dockerfile.rocm7-nightlies @@ -1,16 +1,29 @@ # build FROM registry.fedoraproject.org/fedora:43 AS builder -RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ +# we want to keep ALL dnf downloaded packages in dnf cache, +# this is not the default, hackily add it to the config, +# assume there is not much in it. should be improved +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf + +# we keep the dnf cache as a docker mount, which is shared +# between layers and also between the various dockerfiles. +# We can thus reuse the already-downloaded dnf packages +# on subsequent runs, and the cache will NOT be part of +# the resulting image, so we don't need to dnf clean. +RUN --mount=type=cache,id=dnf-nightlies,target=/var/cache/libdnf5 \ + dnf -y --nodocs --setopt=install_weak_deps=False install \ make gcc cmake lld clang clang-devel compiler-rt libcurl-devel \ - radeontop git vim patch curl ninja-build tar xz aria2c \ - && dnf clean all && rm -rf /var/cache/dnf/* + radeontop git vim patch curl ninja-build tar xz aria2c # find & fetch the latest Linux 7.x.x tarball (gfx1151) WORKDIR /tmp ARG ROCM_MAJOR_VER=7 ARG GFX=gfx1151 -RUN set -euo pipefail; \ +# We cache the downloaded tarball to avoid redownloading +# if no newer version is present. +RUN --mount=type=cache,id=rocm-nightlies-tmp,target=/tmp \ + set -euo pipefail; \ BASE="https://therock-nightly-tarball.s3.amazonaws.com"; \ PREFIX="therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}"; \ KEY="$(curl -s "${BASE}?list-type=2&prefix=${PREFIX}" \ @@ -18,9 +31,15 @@ RUN set -euo pipefail; \ | grep -o "therock-dist-linux-${GFX}-${ROCM_MAJOR_VER}\..*\.tar\.gz" \ | sort -V | tail -n1)"; \ echo "Latest tarball: ${KEY}"; \ - aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o therock.tar.gz -RUN mkdir -p /opt/rocm-7.0 \ - && tar xzf therock.tar.gz -C /opt/rocm-7.0 --strip-components=1 + FILENAME="therock-${KEY}.tar.gz"; \ + if [ -f "$FILENAME" ]; then \ + echo "no new build available, using cached tarball"; \ + else \ + echo "downloading..."; \ + aria2c -x 16 -s 16 -j 16 --file-allocation=none "${BASE}/${KEY}" -o "$FILENAME"; \ + fi; \ + mkdir -p /opt/rocm-7.0 \ + && tar xzf "$FILENAME" -C /opt/rocm-7.0 --strip-components=1 ENV ROCM_PATH=/opt/rocm-7.0 \ HIP_PLATFORM=amd \ @@ -33,7 +52,7 @@ ENV ROCM_PATH=/opt/rocm-7.0 \ LD_LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64:/opt/rocm-7.0/llvm/lib \ LIBRARY_PATH=/opt/rocm-7.0/lib:/opt/rocm-7.0/lib64 \ CPATH=/opt/rocm-7.0/include \ - PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig + PKG_CONFIG_PATH=/opt/rocm-7.0/lib/pkgconfig RUN printf '%s\n' \ 'export ROCM_PATH=/opt/rocm-7.0' \ @@ -55,13 +74,17 @@ RUN printf '%s\n' \ WORKDIR /opt/llama.cpp ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master -RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . \ - && git clean -xdf \ - && git submodule update --recursive +RUN --mount=type=cache,id=rocm-nightlies,target=/opt/llama.cpp \ + test -d .git || git clone -b ${BRANCH} --single-branch --recursive ${REPO} . COPY llama-grammar.patch /tmp/llama-grammar.patch -RUN patch -p1 < /tmp/llama-grammar.patch \ +# build +RUN --mount=type=cache,id=rocm-nightlies,target=/opt/llama.cpp \ + git reset --hard \ + && git pull \ + && git submodule update --recursive \ + && patch -p1 < /tmp/llama-grammar.patch \ && cmake -S . -B build \ -DGGML_HIP=ON \ -DAMDGPU_TARGETS=gfx1151 \ @@ -71,21 +94,29 @@ RUN patch -p1 < /tmp/llama-grammar.patch \ && cmake --build build --config Release -- -j$(nproc) \ && cmake --install build --config Release +# libs +RUN --mount=type=cache,id=rocm-nightlies,target=/opt/llama.cpp \ + mkdir -p /usr/local/lib64 \ + && find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/local/lib64/ \; \ + && ldconfig + # keep bin; drop headers/docs/static libs (retain llama.cpp for rpc binaries) -RUN find /opt/rocm-7.0 -type f -name '*.a' -delete \ +# TODO: combine with above nightly tarball download +RUN --mount=type=cache,id=rocm-nightlies,target=/opt/llama.cpp \ + find /opt/rocm-7.0 -type f -name '*.a' -delete \ && rm -rf /opt/rocm-7.0/include /opt/rocm-7.0/share \ /opt/rocm-7.0/llvm/include /opt/rocm-7.0/llvm/share # runtime FROM registry.fedoraproject.org/fedora-minimal:43 +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf -RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ - bash ca-certificates libatomic libstdc++ libgcc radeontop vim procps-ng \ - && microdnf clean all && rm -rf /var/cache/dnf/* +RUN --mount=type=cache,id=dnf-nightlies,target=/var/cache/libdnf5 \ + microdnf -y --nodocs --setopt=install_weak_deps=0 install \ + bash ca-certificates libatomic libstdc++ libgcc radeontop vim procps-ng COPY --from=builder /opt/rocm-7.0 /opt/rocm-7.0 COPY --from=builder /usr/local/ /usr/local/ -COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ COPY gguf-vram-estimator.py /usr/local/bin/ RUN chmod +x /usr/local/bin/gguf-vram-estimator.py @@ -123,6 +154,9 @@ RUN printf '%s\n' \ # make /usr/local libs visible without touching env RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ && echo "/usr/local/lib64" >> /etc/ld.so.conf.d/local.conf \ + && ldconfig \ + && cp -n /usr/local/lib/libllama*.so* /usr/lib64/ 2>/dev/null || true \ + && cp -n /usr/local/lib64/libllama*.so* /usr/lib64/ 2>/dev/null || true \ && ldconfig CMD ["/bin/bash"] diff --git a/toolboxes/Dockerfile.vulkan-amdvlk b/toolboxes/Dockerfile.vulkan-amdvlk index eb90012..fbe835e 100644 --- a/toolboxes/Dockerfile.vulkan-amdvlk +++ b/toolboxes/Dockerfile.vulkan-amdvlk @@ -1,30 +1,40 @@ # build stage FROM registry.fedoraproject.org/fedora:43 AS builder +# we want to keep ALL dnf downloaded packages in dnf cache, +# this is not the default, hackily add it to the config, +# assume there is not much in it. should be improved +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf # deps -RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ +RUN --mount=type=cache,id=dnf-amdvlk,target=/var/cache/libdnf5 \ + dnf -y --nodocs --setopt=install_weak_deps=False install \ git vim \ make gcc cmake ninja-build lld clang clang-devel compiler-rt libcurl-devel \ vulkan-loader-devel vulkaninfo mesa-vulkan-drivers \ - spirv-headers-devel radeontop glslc wget patch \ - && dnf clean all && rm -rf /var/cache/dnf/* + spirv-headers-devel radeontop glslc wget patch # amdvlk -RUN curl -L -o /tmp/amdvlk-2025.Q2.1.x86_64.rpm \ +RUN \ + --mount=type=cache,id=vulkan-radv-tmp,target=/tmp \ + --mount=type=cache,id=dnf-amdvlk,target=/var/cache/libdnf5 \ + test -f /tmp/amdvlk-2025.Q2.1.x86_64.rpm || \ + curl -L -o /tmp/amdvlk-2025.Q2.1.x86_64.rpm \ https://github.com/GPUOpen-Drivers/AMDVLK/releases/download/v-2025.Q2.1/amdvlk-2025.Q2.1.x86_64.rpm \ - && dnf -y install /tmp/amdvlk-*.rpm \ - && rm -f /tmp/amdvlk-*.rpm + && dnf -y install /tmp/amdvlk-*.rpm # llama.cpp WORKDIR /opt/llama.cpp ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master -RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +RUN --mount=type=cache,id=vulkan-radv,target=/opt/llama.cpp \ + test -d .git || git clone -b ${BRANCH} --single-branch --recursive ${REPO} . COPY llama-grammar.patch /tmp/llama-grammar.patch # build -RUN git clean -xdf \ +RUN --mount=type=cache,id=vulkan-radv,target=/opt/llama.cpp \ + git reset --hard \ + && git pull \ && git submodule update --recursive \ && patch -p1 < /tmp/llama-grammar.patch \ && cmake -S . -B build -G Ninja \ @@ -35,11 +45,12 @@ RUN git clean -xdf \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_EXAMPLES=ON \ -DLLAMA_BUILD_SERVER=ON \ - && cmake --build build --config Release \ + && cmake --build build --config Release -- -j$(nproc) \ && cmake --install build --config Release # libs -RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ +RUN --mount=type=cache,id=vulkan-radv,target=/opt/llama.cpp \ + find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig # helper @@ -49,23 +60,25 @@ RUN chmod +x /usr/local/bin/gguf-vram-estimator.py # runtime stage FROM registry.fedoraproject.org/fedora-minimal:43 +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf # runtime deps -RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ +RUN --mount=type=cache,id=dnf-amdvlk,target=/var/cache/libdnf5 \ + microdnf -y --nodocs --setopt=install_weak_deps=0 install \ bash ca-certificates libatomic libstdc++ libgcc \ - vulkan-loader vulkan-loader-devel vulkaninfo mesa-vulkan-drivers radeontop procps-ng \ - && microdnf clean all && rm -rf /var/cache/dnf/* + vulkan-loader vulkan-loader-devel vulkaninfo mesa-vulkan-drivers radeontop procps-ng -# amdvlk -RUN curl -L -o /tmp/amdvlk-2025.Q2.1.x86_64.rpm \ +RUN \ + --mount=type=cache,id=vulkan-radv-tmp,target=/tmp \ + --mount=type=cache,id=dnf-amdvlk,target=/var/cache/libdnf5 \ + test -f /tmp/amdvlk-2025.Q2.1.x86_64.rpm || \ + curl -L -o /tmp/amdvlk-2025.Q2.1.x86_64.rpm \ https://github.com/GPUOpen-Drivers/AMDVLK/releases/download/v-2025.Q2.1/amdvlk-2025.Q2.1.x86_64.rpm \ - && microdnf -y install /tmp/amdvlk-*.rpm \ - && rm -f /tmp/amdvlk-*.rpm + && dnf -y install /tmp/amdvlk-*.rpm # copy COPY --from=builder /usr/ /usr/ COPY --from=builder /usr/local/ /usr/local/ -COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ # ld RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \ diff --git a/toolboxes/Dockerfile.vulkan-radv b/toolboxes/Dockerfile.vulkan-radv index 096fad9..a9d1e17 100644 --- a/toolboxes/Dockerfile.vulkan-radv +++ b/toolboxes/Dockerfile.vulkan-radv @@ -1,24 +1,31 @@ # build stage FROM registry.fedoraproject.org/fedora:43 AS builder +# we want to keep ALL dnf downloaded packages in dnf cache, +# this is not the default, hackily add it to the config, +# assume there is not much in it. should be improved +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf # deps -RUN dnf -y --nodocs --setopt=install_weak_deps=False install \ +RUN --mount=type=cache,id=dnf-radv,target=/var/cache/libdnf5 \ + dnf -y --nodocs --setopt=install_weak_deps=False install \ git vim \ make gcc cmake ninja-build lld clang clang-devel compiler-rt libcurl-devel \ vulkan-loader-devel vulkaninfo mesa-vulkan-drivers \ - spirv-headers-devel radeontop glslc patch \ - && dnf clean all && rm -rf /var/cache/dnf/* + spirv-headers-devel radeontop glslc patch # llama.cpp WORKDIR /opt/llama.cpp ARG REPO=https://github.com/ggerganov/llama.cpp.git ARG BRANCH=master -RUN git clone -b ${BRANCH} --single-branch --recursive ${REPO} . +RUN --mount=type=cache,id=vulkan-radv,target=/opt/llama.cpp \ + test -d .git || git clone -b ${BRANCH} --single-branch --recursive ${REPO} . COPY llama-grammar.patch /tmp/llama-grammar.patch # build -RUN git clean -xdf \ +RUN --mount=type=cache,id=vulkan-radv,target=/opt/llama.cpp \ + git reset --hard \ + && git pull \ && git submodule update --recursive \ && patch -p1 < /tmp/llama-grammar.patch \ && cmake -S . -B build -G Ninja \ @@ -29,11 +36,12 @@ RUN git clean -xdf \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_EXAMPLES=ON \ -DLLAMA_BUILD_SERVER=ON \ - && cmake --build build --config Release \ + && cmake --build build --config Release -- -j$(nproc) \ && cmake --install build --config Release # libs -RUN find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ +RUN --mount=type=cache,id=vulkan-radv,target=/opt/llama.cpp \ + find /opt/llama.cpp/build -type f -name 'lib*.so*' -exec cp {} /usr/lib64/ \; \ && ldconfig # helper @@ -43,17 +51,17 @@ RUN chmod +x /usr/local/bin/gguf-vram-estimator.py # runtime stage FROM registry.fedoraproject.org/fedora-minimal:43 +RUN echo 'keepcache=True' >> /etc/dnf/dnf.conf # runtime deps -RUN microdnf -y --nodocs --setopt=install_weak_deps=0 install \ +RUN --mount=type=cache,id=dnf-radv,target=/var/cache/libdnf5 \ + microdnf -y --nodocs --setopt=install_weak_deps=0 install \ bash ca-certificates libatomic libstdc++ libgcc \ - vulkan-loader vulkan-loader-devel vulkaninfo mesa-vulkan-drivers radeontop procps-ng \ - && microdnf clean all && rm -rf /var/cache/dnf/* + vulkan-loader vulkan-loader-devel vulkaninfo mesa-vulkan-drivers radeontop procps-ng # copy COPY --from=builder /usr/ /usr/ COPY --from=builder /usr/local/ /usr/local/ -COPY --from=builder /opt/llama.cpp/build/bin/rpc-* /usr/local/bin/ # ld RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf \