updated benchmarks
This commit is contained in:
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 167.68 ± 0.26 |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.67 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.85 ± 0.00 |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f5f7bd95565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5f7bd9592b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5f7bd95aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f5f7f04eeb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f5f7f054034]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f5f7bdac8ce]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f5f7f70a950]
|
||||
/usr/local/bin/llama-bench() [0x408242]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f5f7b72b5b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5f7b72b668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 170.65 ± 0.11 |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.54 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.57 ± 0.00 |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.38 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.42 ± 0.59 |
|
||||
| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.69 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f2015391565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f201539192b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2015391aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f2017d0af12]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f2017d12a66]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f2017d0ffcf]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20153abde3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20183de650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20183e02e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20183e51bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20183e600e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x407edc]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f2014d275b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2014d27668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 147.75 ± 0.96 |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.08 ± 0.00 |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.67 ± 0.37 |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.60 ± 0.00 |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.34 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 151.03 ± 0.71 |
|
||||
| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f1a5d310565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a5d31092b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a5d310aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f1a5fc89f12]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28ce0d7) [0x7f1a5fc9b0d7]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28cccd1) [0x7f1a5fc99cd1]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28cb92c) [0x7f1a5fc9892c]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c645a) [0x7f1a5fc9345a]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c2f0a) [0x7f1a5fc8ff0a]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f1a5fc8efcf]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f1a5d32ade3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1a6035d650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f1a6035f2e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f1a603641bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f1a6036500e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x407edc]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f1a5cca65b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a5cca6668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.51 ± 0.07 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.96 ± 0.00 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.07 ± 0.08 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.71 ± 0.00 |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f295ddb7565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f295ddb792b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f295ddb7aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f2960686fb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f296068c004]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f295ddce8ce]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f2960d2e950]
|
||||
/usr/local/bin/llama-bench() [0x408242]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f295d74d5b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f295d74d668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.84 ± 0.31 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.86 ± 0.00 |
|
||||
| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.28 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f39038cd565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f39038cd92b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f39038cdaaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f3906b86eb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f3906b8eb36]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f3906b8c09f]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f39038e7de3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f3907243650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f39072452e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f390724a1bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f390724b00e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x407edc]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f39032635b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3903263668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 152.66 ± 0.00 |
|
||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.29 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 274.07 ± 3.25 |
|
||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 15.13 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.44 ± 0.00 |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f3efb9fa565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3efb9fa92b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3efb9faaaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f3efe2c9fb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f3efe2cf004]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3efba118ce]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3efe971950]
|
||||
/usr/local/bin/llama-bench() [0x408242]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f3efb3905b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3efb390668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f20b4ffb565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f20b4ffb92b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f20b4ffbaaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f20b7974f12]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f20b797ca66]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f20b7979fcf]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20b5015de3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20b8048650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20b804a2e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20b804f1bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20b805000e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x407edc]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f20b49915b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f20b4991668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fe4591ff565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe4591ff92b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fe4591ffaaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fe45bb78f12]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fe45bb80a66]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fe45bb7dfcf]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fe459219de3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fe45c24c650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fe45c24e2e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fe45c2531bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fe45c25400e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x407edc]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7fe458b955b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7fe458b95668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 271.67 ± 1.52 |
|
||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.13 ± 0.05 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.04 ± 0.00 |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f0845525565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f084552592b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f0845525aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f08487deeb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f08487e4034]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f084553c8ce]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f0848e9a950]
|
||||
/usr/local/bin/llama-bench() [0x408242]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f0844ebb5b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0844ebb668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 269.91 ± 0.99 |
|
||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.11 ± 0.05 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.41 ± 0.00 |
|
||||
| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.67 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f6a6bb84565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6a6bb8492b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6a6bb84aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f6a6e4fdf12]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f6a6e505a66]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f6a6e502fcf]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f6a6bb9ede3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6a6ebd1650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f6a6ebd32e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f6a6ebd81bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f6a6ebd900e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x40816d]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f6a6b51a5b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f6a6b51a668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 failed (exit 0)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fa8c83e4565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa8c83e492b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa8c83e4aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fa8cad5df12]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fa8cad65a66]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fa8cad62fcf]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa8c83fede3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa8cb431650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fa8cb4332e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fa8cb4381bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fa8cb43900e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x408087]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7fa8c7d7a5b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa8c7d7a668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 312.46 ± 3.80 |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.50 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f68ae79e565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f68ae79e92b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f68ae79eaaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f68b1a57eb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f68b1a5fb36]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f68b1a5d09f]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f68ae7b8de3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f68b2114650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f68b21162e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f68b211b1bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f68b211c00e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x407edc]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f68ae1345b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f68ae134668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 313.81 ± 0.68 |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.48 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.58 ± 0.00 |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.31 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 315.62 ± 2.64 |
|
||||
| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.51 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7effceeac565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7effceeac92b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7effceeacaaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7effd1825f12]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7effd182da66]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7effd182afcf]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7effceec6de3]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7effd1ef9650]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7effd1efb2e2]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7effd1f001bf]
|
||||
/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7effd1f0100e]
|
||||
/usr/local/bin/llama-bench() [0x40a3db]
|
||||
/usr/local/bin/llama-bench() [0x408087]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7effce8425b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7effce842668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.40 ± 0.48 |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.93 ± 0.23 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.58 ± 0.00 |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.43 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.52 ± 0.12 |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.05 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.46 ± 0.00 |
|
||||
:0:rocdevice.cpp :3588: 50932421658 us: Callback: Queue 0x7f8e6a000000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29
|
||||
Kernel Name: _ZL18flash_attn_ext_vecILi128ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil
|
||||
VGPU=0x94e06a0 SWq=0x7f8e6cbea000, HWq=0x7f8e6a000000, id=2
|
||||
Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0
|
||||
grid=[32, 68, 64], workgroup=[32, 4, 1]
|
||||
private_seg_size=0, group_seg_size=4352
|
||||
kernel_obj=0x7f8e6a78f180, kernarg_address=0x0x7f738bd49400
|
||||
completion_signal=0x0, correlation_id=0
|
||||
rptr=1368490, wptr=1369554
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f8e79498565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8e7949892b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f8e79498aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f8e7bd67fb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f8e7bd6d004]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f8e794af8ce]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f8e7c40f950]
|
||||
/usr/local/bin/llama-bench() [0x408242]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f8e78e2e5b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8e78e2e668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.69 ± 0.99 |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.05 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.47 ± 0.00 |
|
||||
| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.20 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 352.23 ± 9.28 |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 192.75 ± 0.00 |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 345.22 ± 23.61 |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.84 ± 0.40 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 135.26 ± 0.00 |
|
||||
/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error
|
||||
/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f83b9245565]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f83b924592b]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f83b9245aaf]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f83bbb14fb2]
|
||||
/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f83bbb1a004]
|
||||
/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f83b925c8ce]
|
||||
/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f83bc1bc950]
|
||||
/usr/local/bin/llama-bench() [0x408242]
|
||||
/lib64/libc.so.6(+0x35b5) [0x7f83b8bdb5b5]
|
||||
/lib64/libc.so.6(__libc_start_main+0x88) [0x7f83b8bdb668]
|
||||
/usr/local/bin/llama-bench() [0x409255]
|
||||
✖ ! [rocm-7alpha-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 360.93 ± 3.44 |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.17 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 197.49 ± 0.00 |
|
||||
| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 579.57 ± 12.23 |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.33 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.50 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.86 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.31 ± 5.34 |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.66 ± 0.01 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.86 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.72 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.33 ± 7.18 |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.48 ± 0.01 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.69 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.79 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 669.29 ± 4.01 |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.10 ± 0.01 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.78 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.71 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 666.63 ± 5.54 |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.62 ± 0.02 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 148.47 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.94 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 676.38 ± 1.86 |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.44 ± 0.02 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.70 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.64 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 602.73 ± 3.88 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.21 ± 0.01 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.48 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.72 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 587.21 ± 4.27 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.40 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 200.93 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.69 ± 0.00 |
|
||||
|
||||
build: 12bb5c37 (7074)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 601.39 ± 7.96 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.54 ± 0.01 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.77 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.38 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.70 ± 2.25 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.59 ± 0.01 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 148.73 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.39 ± 0.00 |
|
||||
|
||||
build: 4db63cdde (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 601.34 ± 1.60 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.45 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.98 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.64 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.58 ± 4.35 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.38 ± 0.01 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.30 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.69 ± 0.00 |
|
||||
|
||||
build: 4fc43d43d (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 652.89 ± 1.70 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.10 ± 0.01 |
|
||||
|
||||
build: caca0d55c (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 110.83 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.52 ± 0.00 |
|
||||
|
||||
build: caca0d55c (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 638.38 ± 7.05 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.12 ± 0.00 |
|
||||
|
||||
build: caca0d55c (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.95 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 |
|
||||
|
||||
build: caca0d55c (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 648.39 ± 23.62 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.52 ± 0.01 |
|
||||
|
||||
build: 86f1f4411 (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 218.15 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.43 ± 0.00 |
|
||||
|
||||
build: 86f1f4411 (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 640.53 ± 6.75 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.87 ± 0.01 |
|
||||
|
||||
build: 86f1f4411 (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.10 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.58 ± 0.00 |
|
||||
|
||||
build: 86f1f4411 (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 650.26 ± 1.03 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.80 ± 0.01 |
|
||||
|
||||
build: f1840a25d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.22 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 |
|
||||
|
||||
build: f1840a25d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 634.84 ± 9.56 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.78 ± 0.01 |
|
||||
|
||||
build: f1840a25d (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.93 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.56 ± 0.00 |
|
||||
|
||||
build: f1840a25d (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 649.99 ± 3.07 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.58 ± 0.01 |
|
||||
|
||||
build: 677be4d78 (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.65 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.45 ± 0.00 |
|
||||
|
||||
build: 677be4d78 (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 640.61 ± 7.82 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.69 ± 0.01 |
|
||||
|
||||
build: 677be4d78 (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.74 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.45 ± 0.00 |
|
||||
|
||||
build: 677be4d78 (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 648.21 ± 4.33 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.01 |
|
||||
|
||||
build: b447a9a4b (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.20 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 |
|
||||
|
||||
build: b447a9a4b (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 631.07 ± 4.70 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.89 ± 0.01 |
|
||||
|
||||
build: b447a9a4b (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.72 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.55 ± 0.00 |
|
||||
|
||||
build: b447a9a4b (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 654.79 ± 1.55 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.22 ± 0.01 |
|
||||
|
||||
build: fa5c85a8b (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 237.14 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.14 ± 0.00 |
|
||||
|
||||
build: fa5c85a8b (7085)
|
||||
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 633.61 ± 5.41 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.67 ± 0.01 |
|
||||
|
||||
build: fa5c85a8b (7085)
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
||||
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
||||
ggml_cuda_init: found 1 ROCm devices:
|
||||
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
|
||||
| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 221.13 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.49 ± 0.00 |
|
||||
|
||||
build: fa5c85a8b (7085)
|
||||
@@ -0,0 +1,8 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1152.51 ± 1.98 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 45.58 ± 0.02 |
|
||||
|
||||
build: ab5783eb4 (7089)
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.90 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 19.23 ± 0.00 |
|
||||
|
||||
build: ab5783eb4 (7089)
|
||||
@@ -0,0 +1,8 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 916.61 ± 3.21 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 45.81 ± 0.01 |
|
||||
|
||||
build: 0a3857fe0 (7089)
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
ggml_vulkan: Found 1 Vulkan devices:
|
||||
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
|
||||
| model | size | params | backend | ngl | fa | mmap | test | t/s |
|
||||
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 108.80 ± 0.00 |
|
||||
| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 25.33 ± 0.00 |
|
||||
|
||||
build: 0a3857fe0 (7089)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user