use buildah
This commit is contained in:
@@ -11,7 +11,7 @@ on:
|
||||
default: all
|
||||
|
||||
env:
|
||||
DOCKERHUB_REPO: docker.io/kyuz0/amd-strix-halo-toolboxes
|
||||
DOCKERHUB_REPO: gitea.wefers.page/julian/amd-strix-halo-toolboxes
|
||||
LOCAL_PREFIX: llama
|
||||
|
||||
jobs:
|
||||
@@ -63,11 +63,18 @@ jobs:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
- name: Cache podman storage for ${{ matrix.backend }}
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
key: podman-storage-${{ matrix.backend }}
|
||||
restore-keys: |
|
||||
podman-storage-${{ matrix.backend }}
|
||||
podman-storage
|
||||
path: ~/.local/share/containers/storage
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
run: |
|
||||
podman login -u ${{ secrets.DOCKERHUB_USERNAME}} -p ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Set build timestamp
|
||||
run: echo "BUILD_TS=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_ENV
|
||||
@@ -86,12 +93,48 @@ jobs:
|
||||
CHN="${DOCKERHUB_REPO}:${NAME}"
|
||||
|
||||
echo "→ Building ${DF}"
|
||||
docker build --no-cache -t "${LI}" -f "${DF}" .
|
||||
|
||||
# we use buildah to eventually make use of pushing with
|
||||
# zstd:chunked compression, which is much more efficient
|
||||
# than dockers gzip format.
|
||||
# --pull: ensure we use the latest version of the base image
|
||||
# --squash: flatten the final result image into one single layer.
|
||||
# Avoids large image sizes due to intermediate files
|
||||
# that are irrelevant for the user
|
||||
# --format oci: use the OCI image format, which allows for pushing with zstd:chunked
|
||||
# --no-cache: Recompute every step in the dockerfile, even if the previous layer
|
||||
# has not ben invalidated. Needed since we pull from ze internet.
|
||||
# --cache-(to|from): pull/push the intermedia cache layers resulting from
|
||||
# --mount options in the Dockerfile
|
||||
# NOTE: we are mounting cache layers for dnf and pushing them. This cache
|
||||
# layer is shared amongst all Dockerfiles, since they have the identical
|
||||
# mount parameter. When parallel building with buildah, those cache layers
|
||||
# compete. In parallel, they all pull the latest fitting cache, then maybe
|
||||
# add some packages relevant to their specific variant, then afterwards push
|
||||
# the cache again. When multiple buildahs push the dnf cache, they could invalidate
|
||||
# the just-pushed cache of another builder instance, so some packages might
|
||||
# always be missing. SOLUTION: we give each containers dnf cache an individual
|
||||
# id, thus cache per variant.
|
||||
buildah bud \
|
||||
--pull \
|
||||
--squash \
|
||||
--format oci \
|
||||
--no-cache \
|
||||
-t "${LI}" \
|
||||
-f "${DF}" \
|
||||
.
|
||||
|
||||
echo "→ Running smoke test..."
|
||||
podman run --rm "${LI}" llama version
|
||||
podman run --rm "${LI}" llama-cli --help || { status=$?; echo "llama-cli exited with status $status"; [[ $status -eq 0 || $status -eq 1 || $status -eq 134 ]]; }
|
||||
podman run --rm "${LI}" llama-server --help || { status=$?; echo "llama-server exited with status $status"; [[ $status -eq 0 || $status -eq 1 || $status -eq 134 ]]; }
|
||||
|
||||
# push with zstd:chunked compression, see https://github.com/containers/storage/blob/main/docs/containers-storage-zstd-chunked.md
|
||||
|
||||
echo "→ Tag & push immutable → ${IMM}"
|
||||
docker tag "${LI}" "${IMM}"
|
||||
docker push "${IMM}"
|
||||
buildah tag "${LI}" "${IMM}"
|
||||
buildah push --compression-format zstd:chunked "${IMM}"
|
||||
|
||||
echo "→ Tag & push channel → ${CHN}"
|
||||
docker tag "${IMM}" "${CHN}"
|
||||
docker push "${CHN}"
|
||||
buildah tag "${IMM}" "${CHN}"
|
||||
buildah push --compression-format zstd:chunked "${CHN}"
|
||||
|
||||
+2
-1
@@ -1 +1,2 @@
|
||||
__pycache__
|
||||
__pycache__
|
||||
research
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 72.93 ± 0.06 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.95 ± 0.06 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.04 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.58 ± 0.06 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.48 ± 0.03 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.51 ± 0.07 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.72 ± 0.10 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.57 ± 0.05 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.24 ± 0.10 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.67 ± 0.11 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.53 ± 0.03 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 78.28 ± 0.06 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.98 ± 0.04 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.18 ± 0.03 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 80.59 ± 0.10 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.27 ± 0.02 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 17.65 ± 0.01 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 4.94 ± 0.01 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 1.69 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 54.76 ± 11.46 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 7.15 ± 0.02 |
|
||||
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.27 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 454.95 ± 1.90 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 22.26 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 94.79 ± 0.56 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.48 ± 0.09 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.21 ± 1.79 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 93.83 ± 0.40 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.55 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 407.15 ± 2.05 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 21.51 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 101.09 ± 0.37 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.23 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 414.23 ± 2.09 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.11 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 100.06 ± 0.38 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 15.97 ± 0.45 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 489.62 ± 3.63 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.40 ± 0.02 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.48 ± 1.13 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.50 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.86 ± 2.29 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.06 ± 0.08 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.51 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 106.42 ± 0.08 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 10.87 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.09 ± 0.00 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.28 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 333.10 ± 6.48 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 9.51 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 78.99 ± 0.25 |
|
||||
| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.13 ± 0.02 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.34 ± 1.32 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.94 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 90.22 ± 4.88 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.35 ± 0.04 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.87 ± 1.21 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.09 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.13 ± 0.15 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.56 ± 1.34 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 947.86 ± 2.03 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 33.77 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.85 ± 1.04 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.89 ± 0.04 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 952.84 ± 2.21 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.23 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.01 ± 0.58 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.97 ± 0.04 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 983.72 ± 3.21 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.20 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 80.32 ± 1.28 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.31 ± 0.04 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 955.10 ± 4.53 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.16 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 81.34 ± 1.80 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.32 ± 0.04 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 368.78 ± 0.17 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.80 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.35 ± 0.00 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 18.75 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 877.18 ± 8.15 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.07 ± 0.78 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 90.27 ± 0.42 |
|
||||
| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 23.07 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 48.83 ± 0.01 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.25 ± 0.17 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.45 ± 0.02 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.38 ± 0.03 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.17 ± 0.18 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.04 ± 0.21 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.22 ± 0.16 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.43 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.83 ± 0.25 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.40 ± 0.23 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 155.06 ± 0.11 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.36 ± 0.61 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 151.70 ± 0.21 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.35 ± 0.67 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.02 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 21.74 ± 0.01 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.81 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 8.35 ± 0.01 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.36 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 99.39 ± 0.58 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.76 ± 0.04 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 11.79 ± 0.02 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.44 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 800.17 ± 1.72 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 169.18 ± 1.16 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.22 ± 2.21 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.11 ± 0.81 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 764.18 ± 1.66 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 166.22 ± 1.20 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 766.68 ± 1.07 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 164.84 ± 1.99 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 990.88 ± 3.15 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.50 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 172.42 ± 3.61 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 799.71 ± 2.09 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.19 ± 1.69 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 19.70 ± 0.00 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 8.24 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 16.69 ± 0.01 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.41 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 222.01 ± 0.94 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 7.59 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 76.47 ± 0.38 |
|
||||
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.39 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.61 ± 2.94 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.58 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 562.85 ± 0.47 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.59 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 388.54 ± 2.76 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.61 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 564.71 ± 0.81 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.60 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1026.87 ± 6.06 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.90 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1042.36 ± 2.24 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.08 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1027.41 ± 6.28 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.05 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1038.86 ± 3.17 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.04 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1070.15 ± 5.54 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.56 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 908.79 ± 27.38 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.91 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1038.67 ± 2.82 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.57 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 821.93 ± 29.40 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.92 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 676.59 ± 50.83 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 47.22 ± 0.01 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 342.52 ± 0.46 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 35.25 ± 0.03 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 951.76 ± 41.03 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 46.68 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 799.39 ± 0.69 |
|
||||
| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 41.15 ± 0.06 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 202.36 ± 3.50 |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.80 ± 0.00 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
-8
@@ -1,8 +0,0 @@
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.36 ± 0.87 |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.65 ± 0.30 |
|
||||
|
||||
build: e0c93af2a (7938)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user