updated benchmarks
This commit is contained in:
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 66.52 ± 7.27 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.76 ± 0.08 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.05 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.00 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.47 ± 0.20 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.82 ± 0.07 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.47 ± 0.03 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 81.03 ± 0.03 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.07 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.37 ± 0.00 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 79.43 ± 0.03 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.80 ± 0.05 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.42 ± 0.00 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 73.64 ± 0.05 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.74 ± 0.22 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+18
@@ -0,0 +1,18 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:96: ROCm error
|
||||||
|
/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f1a3468b5a5]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a3468b96b]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a3468baef]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2d4e882) [0x7f1a37496882]
|
||||||
|
/usr/local/lib64/libggml-hip.so.0(+0x2d53c4e) [0x7f1a3749bc4e]
|
||||||
|
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1a346a2e5e]
|
||||||
|
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f1a37b75630]
|
||||||
|
/usr/local/bin/llama-bench() [0x40ae7c]
|
||||||
|
/usr/local/bin/llama-bench() [0x408bd1]
|
||||||
|
/lib64/libc.so.6(+0x35b5) [0x7f1a340215b5]
|
||||||
|
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a34021668]
|
||||||
|
/usr/local/bin/llama-bench() [0x409cf5]
|
||||||
|
✖ ! [rocm-7alpha] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 72.94 ± 2.79 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.16 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.14 ± 0.00 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 41.19 ± 7.76 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 1.87 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.37 ± 0.00 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.26 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.53 ± 0.02 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.96 ± 0.05 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 12.50 ± 0.00 |
|
||||||
|
| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.27 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.50 ± 0.01 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 27.87 ± 0.75 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.42 ± 0.06 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.27 ± 0.02 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 28.46 ± 0.36 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.44 ± 0.04 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 148.26 ± 0.07 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.54 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.21 ± 0.14 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.82 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 67.05 ± 0.01 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.90 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 66.64 ± 0.02 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.86 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.89 ± 0.24 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.81 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 18.12 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.16 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 87.66 ± 0.55 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.96 ± 0.00 |
|
||||||
|
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.39 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 797.13 ± 2.39 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 146.47 ± 5.52 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.39 ± 2.22 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 152.56 ± 6.51 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 |
|
||||||
|
|
||||||
|
build: a14b960bc (7816)
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 801.73 ± 2.77 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.31 ± 0.00 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 805.52 ± 3.18 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.32 ± 0.00 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 798.60 ± 3.84 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.77 ± 0.00 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 799.84 ± 4.89 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_cuda_init: found 1 ROCm devices:
|
||||||
|
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||||
|
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 159.82 ± 0.00 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 187.83 ± 22.96 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.19 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.52 ± 0.00 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.69 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 195.84 ± 0.06 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.56 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
ggml_vulkan: Found 1 Vulkan devices:
|
||||||
|
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||||
|
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||||
|
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 75.42 ± 0.00 |
|
||||||
|
| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.23 ± 0.00 |
|
||||||
|
|
||||||
|
build: 9c142e3a2 (7670)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user