use buildah

This commit is contained in:
Julian Wefers
2026-05-25 19:19:06 +02:00
parent 71abff5a0d
commit bb2a64e3d0
1933 changed files with 10974 additions and 31733 deletions
+53 -10
View File
@@ -11,7 +11,7 @@ on:
default: all default: all
env: env:
DOCKERHUB_REPO: docker.io/kyuz0/amd-strix-halo-toolboxes DOCKERHUB_REPO: gitea.wefers.page/julian/amd-strix-halo-toolboxes
LOCAL_PREFIX: llama LOCAL_PREFIX: llama
jobs: jobs:
@@ -63,11 +63,18 @@ jobs:
- name: Check out repository - name: Check out repository
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Log in to Docker Hub - name: Cache podman storage for ${{ matrix.backend }}
uses: docker/login-action@v2 uses: actions/cache@v5
with: with:
username: ${{ secrets.DOCKERHUB_USERNAME }} key: podman-storage-${{ matrix.backend }}
password: ${{ secrets.DOCKERHUB_TOKEN }} restore-keys: |
podman-storage-${{ matrix.backend }}
podman-storage
path: ~/.local/share/containers/storage
- name: Log in to Docker Hub
run: |
podman login -u ${{ secrets.DOCKERHUB_USERNAME}} -p ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set build timestamp - name: Set build timestamp
run: echo "BUILD_TS=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_ENV run: echo "BUILD_TS=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_ENV
@@ -86,12 +93,48 @@ jobs:
CHN="${DOCKERHUB_REPO}:${NAME}" CHN="${DOCKERHUB_REPO}:${NAME}"
echo "→ Building ${DF}" echo "→ Building ${DF}"
docker build --no-cache -t "${LI}" -f "${DF}" .
# we use buildah to eventually make use of pushing with
# zstd:chunked compression, which is much more efficient
# than dockers gzip format.
# --pull: ensure we use the latest version of the base image
# --squash: flatten the final result image into one single layer.
# Avoids large image sizes due to intermediate files
# that are irrelevant for the user
# --format oci: use the OCI image format, which allows for pushing with zstd:chunked
# --no-cache: Recompute every step in the dockerfile, even if the previous layer
# has not ben invalidated. Needed since we pull from ze internet.
# --cache-(to|from): pull/push the intermedia cache layers resulting from
# --mount options in the Dockerfile
# NOTE: we are mounting cache layers for dnf and pushing them. This cache
# layer is shared amongst all Dockerfiles, since they have the identical
# mount parameter. When parallel building with buildah, those cache layers
# compete. In parallel, they all pull the latest fitting cache, then maybe
# add some packages relevant to their specific variant, then afterwards push
# the cache again. When multiple buildahs push the dnf cache, they could invalidate
# the just-pushed cache of another builder instance, so some packages might
# always be missing. SOLUTION: we give each containers dnf cache an individual
# id, thus cache per variant.
buildah bud \
--pull \
--squash \
--format oci \
--no-cache \
-t "${LI}" \
-f "${DF}" \
.
echo "→ Running smoke test..."
podman run --rm "${LI}" llama version
podman run --rm "${LI}" llama-cli --help || { status=$?; echo "llama-cli exited with status $status"; [[ $status -eq 0 || $status -eq 1 || $status -eq 134 ]]; }
podman run --rm "${LI}" llama-server --help || { status=$?; echo "llama-server exited with status $status"; [[ $status -eq 0 || $status -eq 1 || $status -eq 134 ]]; }
# push with zstd:chunked compression, see https://github.com/containers/storage/blob/main/docs/containers-storage-zstd-chunked.md
echo "→ Tag & push immutable → ${IMM}" echo "→ Tag & push immutable → ${IMM}"
docker tag "${LI}" "${IMM}" buildah tag "${LI}" "${IMM}"
docker push "${IMM}" buildah push --compression-format zstd:chunked "${IMM}"
echo "→ Tag & push channel → ${CHN}" echo "→ Tag & push channel → ${CHN}"
docker tag "${IMM}" "${CHN}" buildah tag "${IMM}" "${CHN}"
docker push "${CHN}" buildah push --compression-format zstd:chunked "${CHN}"
+1
View File
@@ -1 +1,2 @@
__pycache__ __pycache__
research
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 72.93 ± 0.06 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.95 ± 0.06 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.04 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.58 ± 0.06 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.48 ± 0.03 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.51 ± 0.07 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.72 ± 0.10 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.57 ± 0.05 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.24 ± 0.10 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.67 ± 0.11 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.53 ± 0.03 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 78.28 ± 0.06 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.98 ± 0.04 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.18 ± 0.03 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 80.59 ± 0.10 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.27 ± 0.02 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 17.65 ± 0.01 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 4.94 ± 0.01 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 1.69 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 54.76 ± 11.46 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 7.15 ± 0.02 |
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.27 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 454.95 ± 1.90 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 22.26 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 94.79 ± 0.56 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.48 ± 0.09 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.21 ± 1.79 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 93.83 ± 0.40 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.55 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 407.15 ± 2.05 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 21.51 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 101.09 ± 0.37 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.23 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 414.23 ± 2.09 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.11 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 100.06 ± 0.38 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 15.97 ± 0.45 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 489.62 ± 3.63 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.40 ± 0.02 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.48 ± 1.13 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.50 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.86 ± 2.29 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.06 ± 0.08 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.51 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 106.42 ± 0.08 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 10.87 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.09 ± 0.00 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.28 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 333.10 ± 6.48 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 9.51 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 78.99 ± 0.25 |
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.13 ± 0.02 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.34 ± 1.32 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.94 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 90.22 ± 4.88 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.35 ± 0.04 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.87 ± 1.21 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.09 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.13 ± 0.15 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.56 ± 1.34 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 947.86 ± 2.03 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 33.77 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.85 ± 1.04 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.89 ± 0.04 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 952.84 ± 2.21 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.23 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.01 ± 0.58 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.97 ± 0.04 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 983.72 ± 3.21 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.20 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 80.32 ± 1.28 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.31 ± 0.04 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 955.10 ± 4.53 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.16 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 81.34 ± 1.80 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.32 ± 0.04 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 368.78 ± 0.17 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.80 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.35 ± 0.00 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 18.75 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 877.18 ± 8.15 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.07 ± 0.78 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 90.27 ± 0.42 |
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 23.07 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 48.83 ± 0.01 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.25 ± 0.17 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.45 ± 0.02 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.38 ± 0.03 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.17 ± 0.18 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.04 ± 0.21 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.22 ± 0.16 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.43 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.83 ± 0.25 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.40 ± 0.23 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 155.06 ± 0.11 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.36 ± 0.61 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 151.70 ± 0.21 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.35 ± 0.67 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.02 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 21.74 ± 0.01 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.81 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 8.35 ± 0.01 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.36 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 99.39 ± 0.58 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.76 ± 0.04 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 11.79 ± 0.02 |
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.44 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 800.17 ± 1.72 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 169.18 ± 1.16 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.22 ± 2.21 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.11 ± 0.81 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 764.18 ± 1.66 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 166.22 ± 1.20 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 766.68 ± 1.07 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 164.84 ± 1.99 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 990.88 ± 3.15 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.50 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 172.42 ± 3.61 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 799.71 ± 2.09 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.19 ± 1.69 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 19.70 ± 0.00 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 8.24 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 16.69 ± 0.01 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.41 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 222.01 ± 0.94 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 7.59 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 76.47 ± 0.38 |
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.39 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.61 ± 2.94 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.58 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 562.85 ± 0.47 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.59 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 388.54 ± 2.76 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.61 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 564.71 ± 0.81 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.60 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1026.87 ± 6.06 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.90 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1042.36 ± 2.24 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.08 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1027.41 ± 6.28 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.05 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1038.86 ± 3.17 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.04 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1070.15 ± 5.54 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.56 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 908.79 ± 27.38 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.91 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1038.67 ± 2.82 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.57 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 821.93 ± 29.40 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.92 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 676.59 ± 50.83 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 47.22 ± 0.01 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 342.52 ± 0.46 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 35.25 ± 0.03 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 951.76 ± 41.03 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 46.68 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 799.39 ± 0.69 |
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 41.15 ± 0.06 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 202.36 ± 3.50 |
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.80 ± 0.00 |
build: e0c93af2a (7938)
@@ -1,8 +0,0 @@
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.36 ± 0.87 |
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.65 ± 0.30 |
build: e0c93af2a (7938)

Some files were not shown because too many files have changed in this diff Show More