Added Qwen-3-Next benchmarks
This commit is contained in:
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 245.79 ± 0.39 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 19.30 ± 0.09 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 301.76 ± 0.36 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.98 ± 0.02 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 249.80 ± 4.99 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 19.17 ± 0.01 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 305.05 ± 2.25 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.99 ± 0.16 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 311.74 ± 3.86 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 22.66 ± 0.25 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 313.20 ± 3.93 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.02 ± 0.30 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 320.42 ± 0.35 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.06 ± 0.13 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 258.30 ± 2.66 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.49 ± 0.24 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 322.70 ± 2.10 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.45 ± 0.19 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 253.35 ± 2.88 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.25 ± 0.15 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 319.09 ± 2.77 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.72 ± 0.03 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 254.34 ± 2.60 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.06 ± 0.08 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 323.33 ± 1.75 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.37 ± 0.17 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 242.43 ± 0.28 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.35 ± 0.10 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 305.22 ± 3.10 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.60 ± 0.08 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
ggml_vulkan: Device memory allocation of size 614888064 failed.
|
||||
ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfHostMemory
|
||||
main: error: failed to load model '/home/kyuz0/models/UD-Q8_K_XL/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf'
|
||||
✖ ! [vulkan_amdvlk] Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 failed (exit 0)
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 324.45 ± 0.42 |
|
||||
| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 22.31 ± 0.18 |
|
||||
|
||||
build: c6f7a423c (7189)
|
||||
Reference in New Issue
Block a user